cart_tmp_gmem/allocator/
general.rs

1use crate::{
2    allocator::{Allocator, Kind},
3    block::Block,
4    mapping::MappedRange,
5    memory::Memory,
6    AtomSize, Size,
7};
8use hal::{device::Device as _, Backend};
9use hibitset::{BitSet, BitSetLike as _};
10use std::{
11    collections::{BTreeSet, HashMap},
12    hash::BuildHasherDefault,
13    mem,
14    ops::Range,
15    ptr::NonNull,
16    sync::Arc,
17    thread,
18};
19
20//TODO: const fn
21fn max_chunks_per_size() -> usize {
22    (mem::size_of::<usize>() * 8).pow(4)
23}
24
25/// Memory block allocated from `GeneralAllocator`.
26#[derive(Debug)]
27pub struct GeneralBlock<B: Backend> {
28    block_index: u32,
29    chunk_index: u32,
30    count: u32,
31    memory: Arc<Memory<B>>,
32    ptr: Option<NonNull<u8>>,
33    range: Range<Size>,
34}
35
36unsafe impl<B: Backend> Send for GeneralBlock<B> {}
37unsafe impl<B: Backend> Sync for GeneralBlock<B> {}
38
39impl<B: Backend> GeneralBlock<B> {
40    /// Get the size of this block.
41    pub fn size(&self) -> Size {
42        self.range.end - self.range.start
43    }
44}
45
46impl<B: Backend> Block<B> for GeneralBlock<B> {
47    fn properties(&self) -> hal::memory::Properties {
48        self.memory.properties()
49    }
50
51    fn memory(&self) -> &B::Memory {
52        self.memory.raw()
53    }
54
55    fn segment(&self) -> hal::memory::Segment {
56        hal::memory::Segment {
57            offset: self.range.start,
58            size: Some(self.range.end - self.range.start),
59        }
60    }
61
62    fn map<'a>(
63        &'a mut self,
64        _device: &B::Device,
65        segment: hal::memory::Segment,
66    ) -> Result<MappedRange<'a, B>, hal::device::MapError> {
67        let requested_range = crate::segment_to_sub_range(segment, &self.range)?;
68        let mapping_range = match self.memory.non_coherent_atom_size {
69            Some(atom) => crate::align_range(&requested_range, atom),
70            None => requested_range.clone(),
71        };
72
73        Ok(unsafe {
74            MappedRange::from_raw(
75                &*self.memory,
76                self.ptr
77                    .ok_or(hal::device::MapError::MappingFailed)?
78                    .as_ptr()
79                    .offset((mapping_range.start - self.range.start) as isize),
80                mapping_range,
81                requested_range,
82            )
83        })
84    }
85}
86
87/// Config for `GeneralAllocator`.
88#[derive(Clone, Copy, Debug)]
89pub struct GeneralConfig {
90    /// All requests are rounded up to multiple of this value.
91    pub block_size_granularity: Size,
92
93    /// Maximum chunk of blocks size.
94    /// Actual chunk size is `min(max_chunk_size, block_size * blocks_per_chunk)`
95    pub max_chunk_size: Size,
96
97    /// Minimum size of device allocation.
98    pub min_device_allocation: Size,
99}
100
101/// No-fragmentation allocator.
102/// Suitable for any type of small allocations.
103/// Every freed block can be reused.
104#[derive(Debug)]
105pub struct GeneralAllocator<B: Backend> {
106    /// Memory type that this allocator allocates.
107    memory_type: hal::MemoryTypeId,
108
109    /// Memory properties of the memory type.
110    memory_properties: hal::memory::Properties,
111
112    /// All requests are rounded up to multiple of this value.
113    block_size_granularity: Size,
114
115    /// Maximum chunk of blocks size.
116    max_chunk_size: Size,
117
118    /// Minimum size of device allocation.
119    min_device_allocation: Size,
120
121    /// Chunk lists.
122    sizes: HashMap<Size, SizeEntry<B>, BuildHasherDefault<fxhash::FxHasher>>,
123
124    /// Ordered set of sizes that have allocated chunks.
125    chunks: BTreeSet<Size>,
126
127    non_coherent_atom_size: Option<AtomSize>,
128}
129
130//TODO: ensure Send and Sync
131unsafe impl<B: Backend> Send for GeneralAllocator<B> {}
132unsafe impl<B: Backend> Sync for GeneralAllocator<B> {}
133
134#[derive(Debug)]
135struct SizeEntry<B: Backend> {
136    /// Total count of allocated blocks with size corresponding to this entry.
137    total_blocks: Size,
138
139    /// Bits per ready (non-exhausted) chunks with free blocks.
140    ready_chunks: BitSet,
141
142    /// List of chunks.
143    chunks: slab::Slab<Chunk<B>>,
144}
145
146impl<B: Backend> Default for SizeEntry<B> {
147    fn default() -> Self {
148        SizeEntry {
149            chunks: Default::default(),
150            total_blocks: 0,
151            ready_chunks: Default::default(),
152        }
153    }
154}
155
156/// A bit mask of block availability.
157type BlockMask = u64;
158
159const MIN_BLOCKS_PER_CHUNK: u32 = 8;
160const MAX_BLOCKS_PER_CHUNK: u32 = mem::size_of::<BlockMask>() as u32 * 8;
161const LARGE_BLOCK_THRESHOLD: Size = 0x10000;
162
163#[test]
164fn test_constants() {
165    assert!(MIN_BLOCKS_PER_CHUNK < MAX_BLOCKS_PER_CHUNK);
166    assert!(LARGE_BLOCK_THRESHOLD * 2 >= MIN_BLOCKS_PER_CHUNK as Size);
167}
168
169impl<B: Backend> GeneralAllocator<B> {
170    /// Create new `GeneralAllocator`
171    /// for `memory_type` with `memory_properties` specified,
172    /// with `GeneralConfig` provided.
173    pub fn new(
174        memory_type: hal::MemoryTypeId,
175        memory_properties: hal::memory::Properties,
176        config: GeneralConfig,
177        non_coherent_atom_size: Size,
178    ) -> Self {
179        log::trace!(
180            "Create new allocator: type: '{:?}', properties: '{:#?}' config: '{:#?}'",
181            memory_type,
182            memory_properties,
183            config
184        );
185
186        assert!(
187            config.block_size_granularity.is_power_of_two(),
188            "Allocation granularity must be power of two"
189        );
190        assert!(
191            config.max_chunk_size.is_power_of_two(),
192            "Max chunk size must be power of two"
193        );
194
195        assert!(
196            config.min_device_allocation.is_power_of_two(),
197            "Min device allocation must be power of two"
198        );
199
200        assert!(
201            config.min_device_allocation <= config.max_chunk_size,
202            "Min device allocation must be less than or equalt to max chunk size"
203        );
204
205        let (block_size_granularity, non_coherent_atom_size) =
206            if crate::is_non_coherent_visible(memory_properties) {
207                let granularity = non_coherent_atom_size
208                    .max(config.block_size_granularity)
209                    .next_power_of_two();
210                (granularity, AtomSize::new(non_coherent_atom_size))
211            } else {
212                (config.block_size_granularity, None)
213            };
214
215        GeneralAllocator {
216            memory_type,
217            memory_properties,
218            block_size_granularity,
219            max_chunk_size: config.max_chunk_size,
220            min_device_allocation: config.min_device_allocation,
221            sizes: HashMap::default(),
222            chunks: BTreeSet::new(),
223            non_coherent_atom_size,
224        }
225    }
226
227    /// Maximum block size to allocate within a chunk that is another block.
228    pub fn max_sub_block_size(&self) -> Size {
229        self.max_chunk_size / MIN_BLOCKS_PER_CHUNK as Size
230    }
231
232    /// Allocate memory chunk from device.
233    fn alloc_chunk_from_device(
234        &self,
235        device: &B::Device,
236        block_size: Size,
237        count: u32,
238    ) -> Result<Chunk<B>, hal::device::AllocationError> {
239        log::trace!(
240            "Allocate chunk with {} blocks size {} from device",
241            count,
242            block_size
243        );
244
245        let (memory, ptr) = unsafe {
246            super::allocate_memory_helper(
247                device,
248                self.memory_type,
249                block_size * count as Size,
250                self.memory_properties,
251                self.non_coherent_atom_size,
252            )?
253        };
254
255        Ok(Chunk::from_memory(block_size, memory, ptr))
256    }
257
258    /// Allocate memory chunk for given block size.
259    ///
260    /// The chunk will be aligned to the `block_size`.
261    fn alloc_chunk(
262        &mut self,
263        device: &B::Device,
264        block_size: Size,
265        requested_count: u32,
266    ) -> Result<(Chunk<B>, Size), hal::device::AllocationError> {
267        log::trace!(
268            "Allocate chunk for roughly {} blocks of size {}",
269            requested_count,
270            block_size
271        );
272
273        let min_chunk_size = MIN_BLOCKS_PER_CHUNK as Size * block_size;
274        let max_chunk_size = MAX_BLOCKS_PER_CHUNK as Size * block_size;
275        let clamped_count = requested_count
276            .next_power_of_two() // makes it more re-usable
277            .max(MIN_BLOCKS_PER_CHUNK)
278            .min(MAX_BLOCKS_PER_CHUNK);
279        let requested_chunk_size = clamped_count as Size * block_size;
280
281        // If smallest possible chunk size is larger then this allocator max allocation
282        if min_chunk_size > self.max_sub_block_size() {
283            // Allocate memory block from the device.
284            let chunk = self.alloc_chunk_from_device(device, block_size, clamped_count)?;
285            return Ok((chunk, requested_chunk_size));
286        }
287
288        let (block, allocated) = match self
289            .chunks
290            .range(min_chunk_size..=max_chunk_size)
291            .rfind(|&size| size % block_size == 0)
292        {
293            Some(&chunk_size) => {
294                // Allocate block for the chunk.
295                self.alloc_from_entry(device, chunk_size, 1, block_size)?
296            }
297            None => {
298                // Allocate a new block for the chunk.
299                self.alloc_block(device, requested_chunk_size, block_size)?
300            }
301        };
302
303        Ok((Chunk::from_block(block_size, block), allocated))
304    }
305
306    /// Allocate blocks from particular chunk.
307    fn alloc_from_chunk(
308        chunks: &mut slab::Slab<Chunk<B>>,
309        chunk_index: u32,
310        block_size: Size,
311        count: u32,
312        align: Size,
313    ) -> Option<GeneralBlock<B>> {
314        log::trace!(
315            "Allocate {} consecutive blocks of size {} from chunk {}",
316            count,
317            block_size,
318            chunk_index
319        );
320
321        let chunk = &mut chunks[chunk_index as usize];
322        let block_index = chunk.acquire_blocks(count, block_size, align)?;
323        let block_range = chunk.blocks_range(block_size, block_index, count);
324
325        let block_start = block_range.start;
326        debug_assert_eq!((block_range.end - block_start) % count as Size, 0);
327
328        Some(GeneralBlock {
329            range: block_range,
330            memory: Arc::clone(chunk.shared_memory()),
331            block_index,
332            chunk_index,
333            count,
334            ptr: chunk.mapping_ptr().map(|ptr| unsafe {
335                let offset = (block_start - chunk.range().start) as isize;
336                NonNull::new_unchecked(ptr.as_ptr().offset(offset))
337            }),
338        })
339    }
340
341    /// Allocate `count` blocks from size entry.
342    ///
343    /// Note: at this level, `align` is no longer a power of 2.
344    fn alloc_from_entry(
345        &mut self,
346        device: &B::Device,
347        block_size: Size,
348        count: u32,
349        align: Size,
350    ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
351        log::trace!(
352            "Allocate {} consecutive blocks for size {} from the entry",
353            count,
354            block_size
355        );
356
357        debug_assert!(count < MIN_BLOCKS_PER_CHUNK);
358        debug_assert_eq!(
359            block_size % align,
360            0,
361            "Requested entry block size {} is not aligned to {}",
362            block_size,
363            align
364        );
365        let size_entry = self.sizes.entry(block_size).or_default();
366
367        for chunk_index in (&size_entry.ready_chunks).iter() {
368            if let Some(block) = Self::alloc_from_chunk(
369                &mut size_entry.chunks,
370                chunk_index,
371                block_size,
372                count,
373                align,
374            ) {
375                return Ok((block, 0));
376            }
377        }
378
379        if size_entry.chunks.vacant_entry().key() > max_chunks_per_size() {
380            return Err(hal::device::OutOfMemory::Host.into());
381        }
382
383        // This is an estimated block count, and it's a hint.
384        // The actual count will be clamped between MIN and MAX.
385        let estimated_block_count = size_entry.total_blocks as u32;
386        let (chunk, allocated) = self.alloc_chunk(device, block_size, estimated_block_count)?;
387        log::trace!("\tChunk init mask: 0x{:x}", chunk.blocks);
388        let size_entry = self.sizes.entry(block_size).or_default();
389        let chunk_index = size_entry.chunks.insert(chunk) as u32;
390
391        let block = Self::alloc_from_chunk(
392            &mut size_entry.chunks,
393            chunk_index,
394            block_size,
395            count,
396            align,
397        )
398        .expect("New chunk should yield blocks");
399
400        if !size_entry.chunks[chunk_index as usize].is_exhausted() {
401            size_entry.ready_chunks.add(chunk_index);
402        }
403
404        Ok((block, allocated))
405    }
406
407    /// Allocate block.
408    fn alloc_block(
409        &mut self,
410        device: &B::Device,
411        block_size: Size,
412        align: Size,
413    ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
414        log::trace!("Allocate block of size {}", block_size);
415
416        debug_assert_eq!(
417            block_size & (self.block_size_granularity - 1),
418            0,
419            "Requested block size {} is not aligned to the size granularity {}",
420            block_size,
421            self.block_size_granularity
422        );
423        debug_assert_eq!(
424            block_size % align,
425            0,
426            "Requested block size {} is not aligned to {}",
427            block_size,
428            align
429        );
430        let size_entry = self.sizes.entry(block_size).or_default();
431        size_entry.total_blocks += 1;
432
433        let overhead = (MIN_BLOCKS_PER_CHUNK as Size - 1) / size_entry.total_blocks;
434        if overhead >= 1 && block_size >= LARGE_BLOCK_THRESHOLD {
435            // this is chosen is such a way that the required `count`
436            // is less than `MIN_BLOCKS_PER_CHUNK`.
437            let ideal_chunk_size = crate::align_size(
438                block_size * 2 / MIN_BLOCKS_PER_CHUNK as Size,
439                crate::AtomSize::new(align).unwrap(),
440            );
441            let chunk_size = match self
442                .chunks
443                .range(ideal_chunk_size..block_size * overhead)
444                .find(|&size| size % align == 0)
445            {
446                Some(&size) => size,
447                None => {
448                    self.chunks.insert(ideal_chunk_size);
449                    ideal_chunk_size
450                }
451            };
452
453            self.alloc_from_entry(
454                device,
455                chunk_size,
456                ((block_size - 1) / chunk_size + 1) as u32,
457                align,
458            )
459        } else {
460            self.chunks.insert(block_size);
461            self.alloc_from_entry(device, block_size, 1, align)
462        }
463    }
464
465    fn free_chunk(&mut self, device: &B::Device, chunk: Chunk<B>, block_size: Size) -> Size {
466        log::trace!("Free chunk: {:#?}", chunk);
467        assert!(chunk.is_unused(block_size));
468        match chunk.flavor {
469            ChunkFlavor::Dedicated { memory, .. } => {
470                let size = memory.size();
471                match Arc::try_unwrap(memory) {
472                    Ok(mem) => unsafe {
473                        if mem.is_mappable() {
474                            device.unmap_memory(mem.raw());
475                        }
476                        device.free_memory(mem.into_raw());
477                    },
478                    Err(_) => {
479                        log::error!("Allocated `Chunk` was freed, but memory is still shared and never will be destroyed");
480                    }
481                }
482                size
483            }
484            ChunkFlavor::General(block) => self.free(device, block),
485        }
486    }
487
488    fn free_block(&mut self, device: &B::Device, block: GeneralBlock<B>) -> Size {
489        log::trace!("Free block: {:#?}", block);
490
491        let block_size = block.size() / block.count as Size;
492        let size_entry = self
493            .sizes
494            .get_mut(&block_size)
495            .expect("Unable to get size entry from which block was allocated");
496        let chunk_index = block.chunk_index;
497        let chunk = &mut size_entry.chunks[chunk_index as usize];
498        let block_index = block.block_index;
499        let count = block.count;
500
501        chunk.release_blocks(block_index, count);
502        if chunk.is_unused(block_size) {
503            size_entry.ready_chunks.remove(chunk_index);
504            let chunk = size_entry.chunks.remove(chunk_index as usize);
505            drop(block); // it keeps an Arc reference to the chunk
506            self.free_chunk(device, chunk, block_size)
507        } else {
508            size_entry.ready_chunks.add(chunk_index);
509            0
510        }
511    }
512
513    /// Free the contents of the allocator.
514    pub fn clear(&mut self, _device: &B::Device) -> Size {
515        0
516    }
517}
518
519impl<B: Backend> Allocator<B> for GeneralAllocator<B> {
520    type Block = GeneralBlock<B>;
521
522    const KIND: Kind = Kind::General;
523
524    fn alloc(
525        &mut self,
526        device: &B::Device,
527        size: Size,
528        align: Size,
529    ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
530        debug_assert!(align.is_power_of_two());
531        let aligned_size = ((size - 1) | (align - 1) | (self.block_size_granularity - 1)) + 1;
532        let map_aligned_size = match self.non_coherent_atom_size {
533            Some(atom) => crate::align_size(aligned_size, atom),
534            None => aligned_size,
535        };
536
537        log::trace!(
538            "Allocate general block: size: {}, align: {}, aligned size: {}, type: {}",
539            size,
540            align,
541            map_aligned_size,
542            self.memory_type.0
543        );
544
545        self.alloc_block(device, map_aligned_size, align)
546    }
547
548    fn free(&mut self, device: &B::Device, block: GeneralBlock<B>) -> Size {
549        self.free_block(device, block)
550    }
551}
552
553impl<B: Backend> Drop for GeneralAllocator<B> {
554    fn drop(&mut self) {
555        for (index, size) in self.sizes.drain() {
556            if !thread::panicking() {
557                assert_eq!(size.chunks.len(), 0, "SizeEntry({}) is still used", index);
558            } else {
559                log::error!("Memory leak: SizeEntry({}) is still used", index);
560            }
561        }
562    }
563}
564
565/// Block allocated for chunk.
566#[derive(Debug)]
567enum ChunkFlavor<B: Backend> {
568    /// Allocated from device.
569    Dedicated {
570        memory: Arc<Memory<B>>,
571        ptr: Option<NonNull<u8>>,
572    },
573    /// Allocated from chunk of bigger blocks.
574    General(GeneralBlock<B>),
575}
576
577#[derive(Debug)]
578struct Chunk<B: Backend> {
579    flavor: ChunkFlavor<B>,
580    /// Each bit corresponds to a block, which is free if the bit is 1.
581    blocks: BlockMask,
582}
583
584impl<B: Backend> Chunk<B> {
585    fn from_memory(block_size: Size, memory: Memory<B>, ptr: Option<NonNull<u8>>) -> Self {
586        let blocks = memory.size() / block_size;
587        debug_assert!(blocks <= MAX_BLOCKS_PER_CHUNK as Size);
588
589        let high_bit = 1 << (blocks - 1);
590
591        Chunk {
592            flavor: ChunkFlavor::Dedicated {
593                memory: Arc::new(memory),
594                ptr,
595            },
596            blocks: (high_bit - 1) | high_bit,
597        }
598    }
599
600    fn from_block(block_size: Size, chunk_block: GeneralBlock<B>) -> Self {
601        let blocks = (chunk_block.size() / block_size).min(MAX_BLOCKS_PER_CHUNK as Size);
602
603        let high_bit = 1 << (blocks - 1);
604
605        Chunk {
606            flavor: ChunkFlavor::General(chunk_block),
607            blocks: (high_bit - 1) | high_bit,
608        }
609    }
610
611    fn shared_memory(&self) -> &Arc<Memory<B>> {
612        match self.flavor {
613            ChunkFlavor::Dedicated { ref memory, .. } => memory,
614            ChunkFlavor::General(ref block) => &block.memory,
615        }
616    }
617
618    fn range(&self) -> Range<Size> {
619        match self.flavor {
620            ChunkFlavor::Dedicated { ref memory, .. } => 0..memory.size(),
621            ChunkFlavor::General(ref block) => block.range.clone(),
622        }
623    }
624
625    // Get block bytes range
626    fn blocks_range(&self, block_size: Size, block_index: u32, count: u32) -> Range<Size> {
627        let range = self.range();
628        let start = range.start + block_size * block_index as Size;
629        let end = start + block_size * count as Size;
630        debug_assert!(end <= range.end);
631        start..end
632    }
633
634    /// Check if there are free blocks.
635    fn is_unused(&self, block_size: Size) -> bool {
636        let range = self.range();
637        let blocks = ((range.end - range.start) / block_size).min(MAX_BLOCKS_PER_CHUNK as Size);
638
639        let high_bit = 1 << (blocks - 1);
640        let mask = (high_bit - 1) | high_bit;
641
642        debug_assert!(self.blocks <= mask);
643        self.blocks == mask
644    }
645
646    /// Check if there are free blocks.
647    fn is_exhausted(&self) -> bool {
648        self.blocks == 0
649    }
650
651    fn acquire_blocks(&mut self, count: u32, block_size: Size, align: Size) -> Option<u32> {
652        debug_assert!(count > 0 && count <= MAX_BLOCKS_PER_CHUNK);
653
654        // Holds a bit-array of all positions with `count` free blocks.
655        let mut blocks: BlockMask = !0;
656        for i in 0..count {
657            blocks &= self.blocks >> i;
658        }
659        // Find a position in `blocks` that is aligned.
660        while blocks != 0 {
661            let index = blocks.trailing_zeros();
662            blocks ^= 1 << index;
663
664            if (index as Size * block_size) & (align - 1) == 0 {
665                let mask = ((1 << count) - 1) << index;
666                debug_assert_eq!(self.blocks & mask, mask);
667                self.blocks ^= mask;
668                log::trace!(
669                    "Chunk acquired at {}, mask: 0x{:x} -> 0x{:x}",
670                    index,
671                    mask,
672                    self.blocks
673                );
674                return Some(index);
675            }
676        }
677        None
678    }
679
680    fn release_blocks(&mut self, index: u32, count: u32) {
681        debug_assert!(index + count <= MAX_BLOCKS_PER_CHUNK);
682        let mask = ((1 << count) - 1) << index;
683        debug_assert_eq!(self.blocks & mask, 0);
684        self.blocks |= mask;
685        log::trace!(
686            "Chunk released at {}, mask: 0x{:x} -> 0x{:x}",
687            index,
688            mask,
689            self.blocks
690        );
691    }
692
693    fn mapping_ptr(&self) -> Option<NonNull<u8>> {
694        match self.flavor {
695            ChunkFlavor::Dedicated { ptr, .. } => ptr,
696            ChunkFlavor::General(ref block) => block.ptr,
697        }
698    }
699}