gfx_memory/allocator/
general.rs

1use crate::{
2    allocator::{Allocator, Kind},
3    block::Block,
4    mapping::MappedRange,
5    memory::Memory,
6    AtomSize, Size,
7};
8
9use bit_set::BitSet;
10use hal::{device::Device as _, Backend};
11use slab::Slab;
12
13use std::{
14    collections::{BTreeSet, HashMap},
15    hash::BuildHasherDefault,
16    mem,
17    ops::Range,
18    ptr::NonNull,
19    sync::Arc,
20    thread,
21};
22
23//TODO: const fn
24fn max_chunks_per_size() -> usize {
25    (mem::size_of::<usize>() * 8).pow(4)
26}
27
28/// Memory block allocated from `GeneralAllocator`.
29#[derive(Debug)]
30pub struct GeneralBlock<B: Backend> {
31    block_index: u32,
32    chunk_index: u32,
33    count: u32,
34    memory: Arc<Memory<B>>,
35    ptr: Option<NonNull<u8>>,
36    range: Range<Size>,
37}
38
39unsafe impl<B: Backend> Send for GeneralBlock<B> {}
40unsafe impl<B: Backend> Sync for GeneralBlock<B> {}
41
42impl<B: Backend> GeneralBlock<B> {
43    /// Get the size of this block.
44    pub fn size(&self) -> Size {
45        self.range.end - self.range.start
46    }
47}
48
49impl<B: Backend> Block<B> for GeneralBlock<B> {
50    fn properties(&self) -> hal::memory::Properties {
51        self.memory.properties()
52    }
53
54    fn memory(&self) -> &B::Memory {
55        self.memory.raw()
56    }
57
58    fn segment(&self) -> hal::memory::Segment {
59        hal::memory::Segment {
60            offset: self.range.start,
61            size: Some(self.range.end - self.range.start),
62        }
63    }
64
65    fn map<'a>(
66        &'a mut self,
67        _device: &B::Device,
68        segment: hal::memory::Segment,
69    ) -> Result<MappedRange<'a, B>, hal::device::MapError> {
70        let requested_range = crate::segment_to_sub_range(segment, &self.range)?;
71        let mapping_range = match self.memory.non_coherent_atom_size {
72            Some(atom) => crate::align_range(&requested_range, atom),
73            None => requested_range.clone(),
74        };
75
76        Ok(unsafe {
77            MappedRange::from_raw(
78                &*self.memory,
79                self.ptr
80                    .ok_or(hal::device::MapError::MappingFailed)?
81                    .as_ptr()
82                    .offset((mapping_range.start - self.range.start) as isize),
83                mapping_range,
84                requested_range,
85            )
86        })
87    }
88}
89
90/// Config for `GeneralAllocator`.
91#[derive(Clone, Copy, Debug)]
92pub struct GeneralConfig {
93    /// All requests are rounded up to multiple of this value.
94    pub block_size_granularity: Size,
95
96    /// Maximum chunk of blocks size.
97    /// Actual chunk size is `min(max_chunk_size, block_size * blocks_per_chunk)`
98    pub max_chunk_size: Size,
99
100    /// Minimum size of device allocation.
101    pub min_device_allocation: Size,
102}
103
104/// No-fragmentation allocator.
105/// Suitable for any type of small allocations.
106/// Every freed block can be reused.
107#[derive(Debug)]
108pub struct GeneralAllocator<B: Backend> {
109    /// Memory type that this allocator allocates.
110    memory_type: hal::MemoryTypeId,
111
112    /// Memory properties of the memory type.
113    memory_properties: hal::memory::Properties,
114
115    /// All requests are rounded up to multiple of this value.
116    block_size_granularity: Size,
117
118    /// Maximum chunk of blocks size.
119    max_chunk_size: Size,
120
121    /// Minimum size of device allocation.
122    min_device_allocation: Size,
123
124    /// Chunk lists.
125    sizes: HashMap<Size, SizeEntry<B>, BuildHasherDefault<fxhash::FxHasher>>,
126
127    /// Ordered set of sizes that have allocated chunks.
128    chunks: BTreeSet<Size>,
129
130    non_coherent_atom_size: Option<AtomSize>,
131}
132
133//TODO: ensure Send and Sync
134unsafe impl<B: Backend> Send for GeneralAllocator<B> {}
135unsafe impl<B: Backend> Sync for GeneralAllocator<B> {}
136
137#[derive(Debug)]
138struct SizeEntry<B: Backend> {
139    /// Bits per ready (non-exhausted) chunks with free blocks.
140    ready_chunks: BitSet,
141
142    /// List of chunks.
143    chunks: Slab<Chunk<B>>,
144}
145
146impl<B: Backend> Default for SizeEntry<B> {
147    fn default() -> Self {
148        SizeEntry {
149            chunks: Slab::new(),
150            ready_chunks: BitSet::new(),
151        }
152    }
153}
154
155impl<B: Backend> SizeEntry<B> {
156    fn next_block_count(&self, block_size: Size) -> u32 {
157        self.chunks.iter().fold(1u32, |max, (_, chunk)| {
158            (chunk.num_blocks(block_size) as u32).max(max)
159        }) * 2
160    }
161}
162
163/// A bit mask of block availability.
164type BlockMask = u64;
165
166const MIN_BLOCKS_PER_CHUNK: u32 = 8;
167const MAX_BLOCKS_PER_CHUNK: u32 = mem::size_of::<BlockMask>() as u32 * 8;
168const LARGE_BLOCK_THRESHOLD: Size = 0x10000;
169
170#[test]
171fn test_constants() {
172    assert!(MIN_BLOCKS_PER_CHUNK < MAX_BLOCKS_PER_CHUNK);
173    assert!(LARGE_BLOCK_THRESHOLD * 2 >= MIN_BLOCKS_PER_CHUNK as Size);
174}
175
176impl<B: Backend> GeneralAllocator<B> {
177    /// Create new `GeneralAllocator`
178    /// for `memory_type` with `memory_properties` specified,
179    /// with `GeneralConfig` provided.
180    pub fn new(
181        memory_type: hal::MemoryTypeId,
182        memory_properties: hal::memory::Properties,
183        config: GeneralConfig,
184        non_coherent_atom_size: Size,
185    ) -> Self {
186        log::trace!(
187            "Create new allocator: type: '{:?}', properties: '{:#?}' config: '{:#?}'",
188            memory_type,
189            memory_properties,
190            config
191        );
192
193        assert!(
194            config.block_size_granularity.is_power_of_two(),
195            "Allocation granularity must be power of two"
196        );
197        assert!(
198            config.max_chunk_size.is_power_of_two(),
199            "Max chunk size must be power of two"
200        );
201
202        assert!(
203            config.min_device_allocation.is_power_of_two(),
204            "Min device allocation must be power of two"
205        );
206
207        assert!(
208            config.min_device_allocation <= config.max_chunk_size,
209            "Min device allocation must be less than or equalt to max chunk size"
210        );
211
212        let (block_size_granularity, non_coherent_atom_size) =
213            if crate::is_non_coherent_visible(memory_properties) {
214                let granularity = non_coherent_atom_size
215                    .max(config.block_size_granularity)
216                    .next_power_of_two();
217                (granularity, AtomSize::new(non_coherent_atom_size))
218            } else {
219                (config.block_size_granularity, None)
220            };
221
222        GeneralAllocator {
223            memory_type,
224            memory_properties,
225            block_size_granularity,
226            max_chunk_size: config.max_chunk_size,
227            min_device_allocation: config.min_device_allocation,
228            sizes: HashMap::default(),
229            chunks: BTreeSet::new(),
230            non_coherent_atom_size,
231        }
232    }
233
234    /// Maximum block size to allocate within a chunk that is another block.
235    pub fn max_sub_block_size(&self) -> Size {
236        self.max_chunk_size / MIN_BLOCKS_PER_CHUNK as Size
237    }
238
239    /// Allocate memory chunk from device.
240    fn alloc_chunk_from_device(
241        &self,
242        device: &B::Device,
243        block_size: Size,
244        count: u32,
245    ) -> Result<Chunk<B>, hal::device::AllocationError> {
246        log::trace!(
247            "Allocate chunk with {} blocks size {} from device",
248            count,
249            block_size
250        );
251
252        let (memory, ptr) = unsafe {
253            super::allocate_memory_helper(
254                device,
255                self.memory_type,
256                block_size * count as Size,
257                self.memory_properties,
258                self.non_coherent_atom_size,
259            )?
260        };
261
262        Ok(Chunk::from_memory(block_size, memory, ptr))
263    }
264
265    /// Allocate memory chunk for given block size.
266    ///
267    /// The chunk will be aligned to the `block_size`.
268    fn alloc_chunk(
269        &mut self,
270        device: &B::Device,
271        block_size: Size,
272        requested_count: u32,
273    ) -> Result<(Chunk<B>, Size), hal::device::AllocationError> {
274        log::trace!(
275            "Allocate chunk for roughly {} blocks of size {}",
276            requested_count,
277            block_size
278        );
279
280        let min_chunk_size = MIN_BLOCKS_PER_CHUNK as Size * block_size;
281        let max_chunk_size = MAX_BLOCKS_PER_CHUNK as Size * block_size;
282        let clamped_count = requested_count
283            .next_power_of_two() // makes it more re-usable
284            .max(MIN_BLOCKS_PER_CHUNK)
285            .min(MAX_BLOCKS_PER_CHUNK);
286        let requested_chunk_size = clamped_count as Size * block_size;
287
288        // If smallest possible chunk size is larger then this allocator max allocation
289        if min_chunk_size > self.max_sub_block_size() {
290            // Allocate memory block from the device.
291            let chunk = self.alloc_chunk_from_device(device, block_size, clamped_count)?;
292            return Ok((chunk, requested_chunk_size));
293        }
294
295        let (block, allocated) = match self
296            .chunks
297            .range(min_chunk_size..=max_chunk_size)
298            .rfind(|&size| size % block_size == 0)
299        {
300            Some(&chunk_size) => {
301                // Allocate block for the chunk.
302                self.alloc_from_entry(device, chunk_size, 1, block_size)?
303            }
304            None if requested_chunk_size > self.min_device_allocation => {
305                // Allocate memory block from the device.
306                // Note: if we call into `alloc_block` instead, we are going to be
307                // going larger and larger blocks until we hit the ceiling.
308                let chunk = self.alloc_chunk_from_device(device, block_size, clamped_count)?;
309                return Ok((chunk, requested_chunk_size));
310            }
311            None => {
312                // Allocate a new block for the chunk.
313                self.alloc_block(device, requested_chunk_size, block_size)?
314            }
315        };
316
317        Ok((Chunk::from_block(block_size, block), allocated))
318    }
319
320    /// Allocate blocks from particular chunk.
321    fn alloc_from_chunk(
322        chunks: &mut Slab<Chunk<B>>,
323        chunk_index: usize,
324        block_size: Size,
325        count: u32,
326        align: Size,
327    ) -> Option<GeneralBlock<B>> {
328        log::trace!(
329            "Allocate {} consecutive blocks of size {} from chunk {}",
330            count,
331            block_size,
332            chunk_index
333        );
334
335        let chunk = &mut chunks[chunk_index];
336        let block_index = chunk.acquire_blocks(count, block_size, align)?;
337        let block_range = chunk.blocks_range(block_size, block_index, count);
338
339        let block_start = block_range.start;
340        debug_assert_eq!((block_range.end - block_start) % count as Size, 0);
341
342        Some(GeneralBlock {
343            range: block_range,
344            memory: Arc::clone(chunk.shared_memory()),
345            block_index,
346            chunk_index: chunk_index as u32,
347            count,
348            ptr: chunk.mapping_ptr().map(|ptr| unsafe {
349                let offset = (block_start - chunk.range().start) as isize;
350                NonNull::new_unchecked(ptr.as_ptr().offset(offset))
351            }),
352        })
353    }
354
355    /// Allocate `count` blocks from size entry.
356    ///
357    /// Note: at this level, `align` is no longer a power of 2.
358    fn alloc_from_entry(
359        &mut self,
360        device: &B::Device,
361        block_size: Size,
362        count: u32,
363        align: Size,
364    ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
365        log::trace!(
366            "Allocate {} consecutive blocks for size {} from the entry",
367            count,
368            block_size
369        );
370
371        debug_assert!(count < MIN_BLOCKS_PER_CHUNK);
372        debug_assert_eq!(
373            block_size % align,
374            0,
375            "Requested entry block size {} is not aligned to {}",
376            block_size,
377            align
378        );
379        let size_entry = self.sizes.entry(block_size).or_default();
380
381        for chunk_index in (&size_entry.ready_chunks).iter() {
382            if let Some(block) = Self::alloc_from_chunk(
383                &mut size_entry.chunks,
384                chunk_index,
385                block_size,
386                count,
387                align,
388            ) {
389                return Ok((block, 0));
390            }
391        }
392
393        if size_entry.chunks.vacant_entry().key() > max_chunks_per_size() {
394            return Err(hal::device::OutOfMemory::Host.into());
395        }
396
397        // The estimated block count is a hint.
398        // The actual count will be clamped between MIN and MAX.
399        let estimated_block_count = size_entry.next_block_count(block_size);
400        let (chunk, allocated) = self.alloc_chunk(device, block_size, estimated_block_count)?;
401        log::trace!("\tChunk init mask: 0x{:x}", chunk.blocks);
402        let size_entry = self.sizes.entry(block_size).or_default();
403        let chunk_index = size_entry.chunks.insert(chunk);
404
405        let block = Self::alloc_from_chunk(
406            &mut size_entry.chunks,
407            chunk_index,
408            block_size,
409            count,
410            align,
411        )
412        .expect("New chunk should yield blocks");
413
414        if !size_entry.chunks[chunk_index].is_exhausted() {
415            size_entry.ready_chunks.insert(chunk_index);
416        }
417
418        Ok((block, allocated))
419    }
420
421    /// Allocate block.
422    fn alloc_block(
423        &mut self,
424        device: &B::Device,
425        block_size: Size,
426        align: Size,
427    ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
428        log::trace!("Allocate block of size {}", block_size);
429
430        debug_assert_eq!(
431            block_size & (self.block_size_granularity - 1),
432            0,
433            "Requested block size {} is not aligned to the size granularity {}",
434            block_size,
435            self.block_size_granularity
436        );
437        debug_assert_eq!(
438            block_size % align,
439            0,
440            "Requested block size {} is not aligned to {}",
441            block_size,
442            align
443        );
444        let size_entry = self.sizes.entry(block_size).or_default();
445
446        let overhead = (MIN_BLOCKS_PER_CHUNK - 1) / size_entry.next_block_count(block_size);
447        if overhead >= 1 && block_size >= LARGE_BLOCK_THRESHOLD {
448            // this is chosen is such a way that the required `count`
449            // is less than `MIN_BLOCKS_PER_CHUNK`.
450            let ideal_chunk_size = crate::align_size(
451                block_size * 2 / MIN_BLOCKS_PER_CHUNK as Size,
452                crate::AtomSize::new(align).unwrap(),
453            );
454            let chunk_size = match self
455                .chunks
456                .range(ideal_chunk_size..block_size * overhead as Size)
457                .find(|&size| size % align == 0)
458            {
459                Some(&size) => size,
460                None => {
461                    self.chunks.insert(ideal_chunk_size);
462                    ideal_chunk_size
463                }
464            };
465
466            self.alloc_from_entry(
467                device,
468                chunk_size,
469                ((block_size - 1) / chunk_size + 1) as u32,
470                align,
471            )
472        } else {
473            self.chunks.insert(block_size);
474            self.alloc_from_entry(device, block_size, 1, align)
475        }
476    }
477
478    fn free_chunk(&mut self, device: &B::Device, chunk: Chunk<B>, block_size: Size) -> Size {
479        log::trace!("Free chunk: {:#?}", chunk);
480        assert!(chunk.is_unused(block_size));
481        match chunk.flavor {
482            ChunkFlavor::Dedicated { memory, .. } => {
483                let size = memory.size();
484                match Arc::try_unwrap(memory) {
485                    Ok(mem) => unsafe {
486                        if mem.is_mappable() {
487                            device.unmap_memory(mem.raw());
488                        }
489                        device.free_memory(mem.into_raw());
490                    },
491                    Err(_) => {
492                        log::error!("Allocated `Chunk` was freed, but memory is still shared and never will be destroyed");
493                    }
494                }
495                size
496            }
497            ChunkFlavor::General(block) => self.free(device, block),
498        }
499    }
500
501    fn free_block(&mut self, device: &B::Device, block: GeneralBlock<B>) -> Size {
502        log::trace!("Free block: {:#?}", block);
503
504        let block_size = block.size() / block.count as Size;
505        let size_entry = self
506            .sizes
507            .get_mut(&block_size)
508            .expect("Unable to get size entry from which block was allocated");
509        let chunk_index = block.chunk_index as usize;
510        let chunk = &mut size_entry.chunks[chunk_index];
511        let block_index = block.block_index;
512        let count = block.count;
513
514        chunk.release_blocks(block_index, count);
515        if chunk.is_unused(block_size) {
516            size_entry.ready_chunks.remove(chunk_index);
517            let chunk = size_entry.chunks.remove(chunk_index);
518            drop(block); // it keeps an Arc reference to the chunk
519            self.free_chunk(device, chunk, block_size)
520        } else {
521            size_entry.ready_chunks.insert(chunk_index);
522            0
523        }
524    }
525
526    /// Free the contents of the allocator.
527    pub fn clear(&mut self, _device: &B::Device) -> Size {
528        0
529    }
530}
531
532impl<B: Backend> Allocator<B> for GeneralAllocator<B> {
533    type Block = GeneralBlock<B>;
534
535    const KIND: Kind = Kind::General;
536
537    fn alloc(
538        &mut self,
539        device: &B::Device,
540        size: Size,
541        align: Size,
542    ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
543        debug_assert!(align.is_power_of_two());
544        let aligned_size = ((size - 1) | (align - 1) | (self.block_size_granularity - 1)) + 1;
545        let map_aligned_size = match self.non_coherent_atom_size {
546            Some(atom) => crate::align_size(aligned_size, atom),
547            None => aligned_size,
548        };
549
550        log::trace!(
551            "Allocate general block: size: {}, align: {}, aligned size: {}, type: {}",
552            size,
553            align,
554            map_aligned_size,
555            self.memory_type.0
556        );
557
558        self.alloc_block(device, map_aligned_size, align)
559    }
560
561    fn free(&mut self, device: &B::Device, block: GeneralBlock<B>) -> Size {
562        self.free_block(device, block)
563    }
564}
565
566impl<B: Backend> Drop for GeneralAllocator<B> {
567    fn drop(&mut self) {
568        for (index, size) in self.sizes.drain() {
569            if !thread::panicking() {
570                assert_eq!(size.chunks.len(), 0, "SizeEntry({}) is still used", index);
571            } else {
572                log::error!("Memory leak: SizeEntry({}) is still used", index);
573            }
574        }
575    }
576}
577
578/// Block allocated for chunk.
579#[derive(Debug)]
580enum ChunkFlavor<B: Backend> {
581    /// Allocated from device.
582    Dedicated {
583        memory: Arc<Memory<B>>,
584        ptr: Option<NonNull<u8>>,
585    },
586    /// Allocated from chunk of bigger blocks.
587    General(GeneralBlock<B>),
588}
589
590#[derive(Debug)]
591struct Chunk<B: Backend> {
592    flavor: ChunkFlavor<B>,
593    /// Each bit corresponds to a block, which is free if the bit is 1.
594    blocks: BlockMask,
595}
596
597impl<B: Backend> Chunk<B> {
598    fn from_memory(block_size: Size, memory: Memory<B>, ptr: Option<NonNull<u8>>) -> Self {
599        let blocks = memory.size() / block_size;
600        debug_assert!(blocks <= MAX_BLOCKS_PER_CHUNK as Size);
601
602        let high_bit = 1 << (blocks - 1);
603
604        Chunk {
605            flavor: ChunkFlavor::Dedicated {
606                memory: Arc::new(memory),
607                ptr,
608            },
609            blocks: (high_bit - 1) | high_bit,
610        }
611    }
612
613    fn from_block(block_size: Size, chunk_block: GeneralBlock<B>) -> Self {
614        let blocks = (chunk_block.size() / block_size).min(MAX_BLOCKS_PER_CHUNK as Size);
615
616        let high_bit = 1 << (blocks - 1);
617
618        Chunk {
619            flavor: ChunkFlavor::General(chunk_block),
620            blocks: (high_bit - 1) | high_bit,
621        }
622    }
623
624    fn shared_memory(&self) -> &Arc<Memory<B>> {
625        match self.flavor {
626            ChunkFlavor::Dedicated { ref memory, .. } => memory,
627            ChunkFlavor::General(ref block) => &block.memory,
628        }
629    }
630
631    fn range(&self) -> Range<Size> {
632        match self.flavor {
633            ChunkFlavor::Dedicated { ref memory, .. } => 0..memory.size(),
634            ChunkFlavor::General(ref block) => block.range.clone(),
635        }
636    }
637
638    // Get block bytes range
639    fn blocks_range(&self, block_size: Size, block_index: u32, count: u32) -> Range<Size> {
640        let range = self.range();
641        let start = range.start + block_size * block_index as Size;
642        let end = start + block_size * count as Size;
643        debug_assert!(end <= range.end);
644        start..end
645    }
646
647    fn num_blocks(&self, block_size: Size) -> Size {
648        let range = self.range();
649        ((range.end - range.start) / block_size).min(MAX_BLOCKS_PER_CHUNK as Size)
650    }
651
652    /// Check if there are free blocks.
653    fn is_unused(&self, block_size: Size) -> bool {
654        let blocks = self.num_blocks(block_size);
655
656        let high_bit = 1 << (blocks - 1);
657        let mask = (high_bit - 1) | high_bit;
658
659        debug_assert!(self.blocks <= mask);
660        self.blocks == mask
661    }
662
663    /// Check if there are free blocks.
664    fn is_exhausted(&self) -> bool {
665        self.blocks == 0
666    }
667
668    fn acquire_blocks(&mut self, count: u32, block_size: Size, align: Size) -> Option<u32> {
669        debug_assert!(count > 0 && count <= MAX_BLOCKS_PER_CHUNK);
670
671        // Holds a bit-array of all positions with `count` free blocks.
672        let mut blocks: BlockMask = !0;
673        for i in 0..count {
674            blocks &= self.blocks >> i;
675        }
676        // Find a position in `blocks` that is aligned.
677        while blocks != 0 {
678            let index = blocks.trailing_zeros();
679            blocks ^= 1 << index;
680
681            if (index as Size * block_size) & (align - 1) == 0 {
682                let mask = ((1 << count) - 1) << index;
683                debug_assert_eq!(self.blocks & mask, mask);
684                self.blocks ^= mask;
685                log::trace!(
686                    "Chunk acquired at {}, mask: 0x{:x} -> 0x{:x}",
687                    index,
688                    mask,
689                    self.blocks
690                );
691                return Some(index);
692            }
693        }
694        None
695    }
696
697    fn release_blocks(&mut self, index: u32, count: u32) {
698        debug_assert!(index + count <= MAX_BLOCKS_PER_CHUNK);
699        let mask = ((1 << count) - 1) << index;
700        debug_assert_eq!(self.blocks & mask, 0);
701        self.blocks |= mask;
702        log::trace!(
703            "Chunk released at {}, mask: 0x{:x} -> 0x{:x}",
704            index,
705            mask,
706            self.blocks
707        );
708    }
709
710    fn mapping_ptr(&self) -> Option<NonNull<u8>> {
711        match self.flavor {
712            ChunkFlavor::Dedicated { ptr, .. } => ptr,
713            ChunkFlavor::General(ref block) => block.ptr,
714        }
715    }
716}