gpu_allocator/metal/
mod.rs

1#![deny(clippy::unimplemented, clippy::unwrap_used, clippy::ok_expect)]
2use std::{backtrace::Backtrace, sync::Arc};
3
4use log::debug;
5
6use crate::{
7    allocator::{self, AllocatorReport, MemoryBlockReport},
8    AllocationError, AllocationSizes, AllocatorDebugSettings, MemoryLocation, Result,
9};
10
11fn memory_location_to_metal(location: MemoryLocation) -> metal::MTLResourceOptions {
12    match location {
13        MemoryLocation::GpuOnly => metal::MTLResourceOptions::StorageModePrivate,
14        MemoryLocation::CpuToGpu | MemoryLocation::GpuToCpu | MemoryLocation::Unknown => {
15            metal::MTLResourceOptions::StorageModeShared
16        }
17    }
18}
19
20#[derive(Debug)]
21pub struct Allocation {
22    chunk_id: Option<std::num::NonZeroU64>,
23    offset: u64,
24    size: u64,
25    memory_block_index: usize,
26    memory_type_index: usize,
27    heap: Arc<metal::Heap>,
28    name: Option<Box<str>>,
29}
30
31impl Allocation {
32    pub fn heap(&self) -> &metal::Heap {
33        self.heap.as_ref()
34    }
35
36    pub fn make_buffer(&self) -> Option<metal::Buffer> {
37        let resource =
38            self.heap
39                .new_buffer_with_offset(self.size, self.heap.resource_options(), self.offset);
40        if let Some(resource) = &resource {
41            if let Some(name) = &self.name {
42                resource.set_label(name);
43            }
44        }
45        resource
46    }
47
48    pub fn make_texture(&self, desc: &metal::TextureDescriptor) -> Option<metal::Texture> {
49        let resource = self.heap.new_texture_with_offset(desc, self.offset);
50        if let Some(resource) = &resource {
51            if let Some(name) = &self.name {
52                resource.set_label(name);
53            }
54        }
55        resource
56    }
57
58    pub fn make_acceleration_structure(&self) -> Option<metal::AccelerationStructure> {
59        let resource = self
60            .heap
61            .new_acceleration_structure_with_size_offset(self.size, self.offset);
62        if let Some(resource) = &resource {
63            if let Some(name) = &self.name {
64                resource.set_label(name);
65            }
66        }
67        resource
68    }
69
70    fn is_null(&self) -> bool {
71        self.chunk_id.is_none()
72    }
73}
74
75#[derive(Clone, Debug)]
76pub struct AllocationCreateDesc<'a> {
77    /// Name of the allocation, for tracking and debugging purposes
78    pub name: &'a str,
79    /// Location where the memory allocation should be stored
80    pub location: MemoryLocation,
81    pub size: u64,
82    pub alignment: u64,
83}
84
85impl<'a> AllocationCreateDesc<'a> {
86    pub fn buffer(
87        device: &metal::Device,
88        name: &'a str,
89        length: u64,
90        location: MemoryLocation,
91    ) -> Self {
92        let size_and_align =
93            device.heap_buffer_size_and_align(length, memory_location_to_metal(location));
94        Self {
95            name,
96            location,
97            size: size_and_align.size,
98            alignment: size_and_align.align,
99        }
100    }
101
102    pub fn texture(device: &metal::Device, name: &'a str, desc: &metal::TextureDescriptor) -> Self {
103        let size_and_align = device.heap_texture_size_and_align(desc);
104        Self {
105            name,
106            location: match desc.storage_mode() {
107                metal::MTLStorageMode::Shared
108                | metal::MTLStorageMode::Managed
109                | metal::MTLStorageMode::Memoryless => MemoryLocation::Unknown,
110                metal::MTLStorageMode::Private => MemoryLocation::GpuOnly,
111            },
112            size: size_and_align.size,
113            alignment: size_and_align.align,
114        }
115    }
116
117    pub fn acceleration_structure_with_size(
118        device: &metal::Device,
119        name: &'a str,
120        size: u64,
121        location: MemoryLocation,
122    ) -> Self {
123        let size_and_align = device.heap_acceleration_structure_size_and_align_with_size(size);
124        Self {
125            name,
126            location,
127            size: size_and_align.size,
128            alignment: size_and_align.align,
129        }
130    }
131}
132
133pub struct Allocator {
134    device: Arc<metal::Device>,
135    debug_settings: AllocatorDebugSettings,
136    memory_types: Vec<MemoryType>,
137    allocation_sizes: AllocationSizes,
138}
139
140#[derive(Debug)]
141pub struct AllocatorCreateDesc {
142    pub device: Arc<metal::Device>,
143    pub debug_settings: AllocatorDebugSettings,
144    pub allocation_sizes: AllocationSizes,
145}
146
147#[derive(Debug)]
148pub struct CommittedAllocationStatistics {
149    pub num_allocations: usize,
150    pub total_size: u64,
151}
152
153#[derive(Debug)]
154struct MemoryBlock {
155    heap: Arc<metal::Heap>,
156    size: u64,
157    sub_allocator: Box<dyn allocator::SubAllocator>,
158}
159
160impl MemoryBlock {
161    fn new(
162        device: &Arc<metal::Device>,
163        size: u64,
164        heap_descriptor: &metal::HeapDescriptor,
165        dedicated: bool,
166        memory_location: MemoryLocation,
167    ) -> Result<Self> {
168        heap_descriptor.set_size(size);
169
170        let heap = Arc::new(device.new_heap(heap_descriptor));
171        heap.set_label(&format!("MemoryBlock {memory_location:?}"));
172
173        let sub_allocator: Box<dyn allocator::SubAllocator> = if dedicated {
174            Box::new(allocator::DedicatedBlockAllocator::new(size))
175        } else {
176            Box::new(allocator::FreeListAllocator::new(size))
177        };
178
179        Ok(Self {
180            heap,
181            size,
182            sub_allocator,
183        })
184    }
185}
186
187#[derive(Debug)]
188struct MemoryType {
189    memory_blocks: Vec<Option<MemoryBlock>>,
190    _committed_allocations: CommittedAllocationStatistics,
191    memory_location: MemoryLocation,
192    heap_properties: metal::HeapDescriptor,
193    memory_type_index: usize,
194    active_general_blocks: usize,
195}
196
197impl MemoryType {
198    fn allocate(
199        &mut self,
200        device: &Arc<metal::Device>,
201        desc: &AllocationCreateDesc<'_>,
202        backtrace: Arc<Backtrace>,
203        allocation_sizes: &AllocationSizes,
204    ) -> Result<Allocation> {
205        let allocation_type = allocator::AllocationType::Linear;
206
207        let memblock_size = if self.heap_properties.storage_mode() == metal::MTLStorageMode::Private
208        {
209            allocation_sizes.device_memblock_size
210        } else {
211            allocation_sizes.host_memblock_size
212        };
213
214        let size = desc.size;
215        let alignment = desc.alignment;
216
217        // Create a dedicated block for large memory allocations
218        if size > memblock_size {
219            let mem_block = MemoryBlock::new(
220                device,
221                size,
222                &self.heap_properties,
223                true,
224                self.memory_location,
225            )?;
226
227            let block_index = self.memory_blocks.iter().position(|block| block.is_none());
228            let block_index = match block_index {
229                Some(i) => {
230                    self.memory_blocks[i].replace(mem_block);
231                    i
232                }
233                None => {
234                    self.memory_blocks.push(Some(mem_block));
235                    self.memory_blocks.len() - 1
236                }
237            };
238
239            let mem_block = self.memory_blocks[block_index]
240                .as_mut()
241                .ok_or_else(|| AllocationError::Internal("Memory block must be Some".into()))?;
242
243            let (offset, chunk_id) = mem_block.sub_allocator.allocate(
244                size,
245                alignment,
246                allocation_type,
247                1,
248                desc.name,
249                backtrace,
250            )?;
251
252            return Ok(Allocation {
253                chunk_id: Some(chunk_id),
254                size,
255                offset,
256                memory_block_index: block_index,
257                memory_type_index: self.memory_type_index,
258                heap: mem_block.heap.clone(),
259                name: Some(desc.name.into()),
260            });
261        }
262
263        let mut empty_block_index = None;
264        for (mem_block_i, mem_block) in self.memory_blocks.iter_mut().enumerate().rev() {
265            if let Some(mem_block) = mem_block {
266                let allocation = mem_block.sub_allocator.allocate(
267                    size,
268                    alignment,
269                    allocation_type,
270                    1,
271                    desc.name,
272                    backtrace.clone(),
273                );
274
275                match allocation {
276                    Ok((offset, chunk_id)) => {
277                        return Ok(Allocation {
278                            chunk_id: Some(chunk_id),
279                            offset,
280                            size,
281                            memory_block_index: mem_block_i,
282                            memory_type_index: self.memory_type_index,
283                            heap: mem_block.heap.clone(),
284                            name: Some(desc.name.into()),
285                        });
286                    }
287                    Err(AllocationError::OutOfMemory) => {} // Block is full, continue search.
288                    Err(err) => return Err(err),            // Unhandled error, return.
289                }
290            } else if empty_block_index.is_none() {
291                empty_block_index = Some(mem_block_i);
292            }
293        }
294
295        let new_memory_block = MemoryBlock::new(
296            device,
297            memblock_size,
298            &self.heap_properties,
299            false,
300            self.memory_location,
301        )?;
302
303        let new_block_index = if let Some(block_index) = empty_block_index {
304            self.memory_blocks[block_index] = Some(new_memory_block);
305            block_index
306        } else {
307            self.memory_blocks.push(Some(new_memory_block));
308            self.memory_blocks.len() - 1
309        };
310
311        self.active_general_blocks += 1;
312
313        let mem_block = self.memory_blocks[new_block_index]
314            .as_mut()
315            .ok_or_else(|| AllocationError::Internal("Memory block must be Some".into()))?;
316        let allocation = mem_block.sub_allocator.allocate(
317            size,
318            alignment,
319            allocation_type,
320            1,
321            desc.name,
322            backtrace,
323        );
324        let (offset, chunk_id) = match allocation {
325            Err(AllocationError::OutOfMemory) => Err(AllocationError::Internal(
326                "Allocation that must succeed failed. This is a bug in the allocator.".into(),
327            )),
328            a => a,
329        }?;
330
331        Ok(Allocation {
332            chunk_id: Some(chunk_id),
333            offset,
334            size,
335            memory_block_index: new_block_index,
336            memory_type_index: self.memory_type_index,
337            heap: mem_block.heap.clone(),
338            name: Some(desc.name.into()),
339        })
340    }
341
342    fn free(&mut self, allocation: &Allocation) -> Result<()> {
343        let block_idx = allocation.memory_block_index;
344
345        let mem_block = self.memory_blocks[block_idx]
346            .as_mut()
347            .ok_or_else(|| AllocationError::Internal("Memory block must be Some.".into()))?;
348
349        mem_block.sub_allocator.free(allocation.chunk_id)?;
350
351        if mem_block.sub_allocator.is_empty() {
352            if mem_block.sub_allocator.supports_general_allocations() {
353                if self.active_general_blocks > 1 {
354                    let block = self.memory_blocks[block_idx].take();
355                    if block.is_none() {
356                        return Err(AllocationError::Internal(
357                            "Memory block must be Some.".into(),
358                        ));
359                    }
360                    // Note that `block` will be destroyed on `drop` here
361
362                    self.active_general_blocks -= 1;
363                }
364            } else {
365                let block = self.memory_blocks[block_idx].take();
366                if block.is_none() {
367                    return Err(AllocationError::Internal(
368                        "Memory block must be Some.".into(),
369                    ));
370                }
371                // Note that `block` will be destroyed on `drop` here
372            }
373        }
374
375        Ok(())
376    }
377}
378
379impl Allocator {
380    pub fn new(desc: &AllocatorCreateDesc) -> Result<Self> {
381        let heap_types = [
382            (MemoryLocation::GpuOnly, {
383                let heap_desc = metal::HeapDescriptor::new();
384                heap_desc.set_cpu_cache_mode(metal::MTLCPUCacheMode::DefaultCache);
385                heap_desc.set_storage_mode(metal::MTLStorageMode::Private);
386                heap_desc.set_heap_type(metal::MTLHeapType::Placement);
387                heap_desc
388            }),
389            (MemoryLocation::CpuToGpu, {
390                let heap_desc = metal::HeapDescriptor::new();
391                heap_desc.set_cpu_cache_mode(metal::MTLCPUCacheMode::WriteCombined);
392                heap_desc.set_storage_mode(metal::MTLStorageMode::Shared);
393                heap_desc.set_heap_type(metal::MTLHeapType::Placement);
394                heap_desc
395            }),
396            (MemoryLocation::GpuToCpu, {
397                let heap_desc = metal::HeapDescriptor::new();
398                heap_desc.set_cpu_cache_mode(metal::MTLCPUCacheMode::DefaultCache);
399                heap_desc.set_storage_mode(metal::MTLStorageMode::Shared);
400                heap_desc.set_heap_type(metal::MTLHeapType::Placement);
401                heap_desc
402            }),
403        ];
404
405        let memory_types = heap_types
406            .into_iter()
407            .enumerate()
408            .map(|(i, (memory_location, heap_descriptor))| MemoryType {
409                memory_blocks: vec![],
410                _committed_allocations: CommittedAllocationStatistics {
411                    num_allocations: 0,
412                    total_size: 0,
413                },
414                memory_location,
415                heap_properties: heap_descriptor,
416                memory_type_index: i,
417                active_general_blocks: 0,
418            })
419            .collect();
420
421        Ok(Self {
422            device: desc.device.clone(),
423            debug_settings: desc.debug_settings,
424            memory_types,
425            allocation_sizes: desc.allocation_sizes,
426        })
427    }
428
429    pub fn allocate(&mut self, desc: &AllocationCreateDesc<'_>) -> Result<Allocation> {
430        let size = desc.size;
431        let alignment = desc.alignment;
432
433        let backtrace = Arc::new(if self.debug_settings.store_stack_traces {
434            Backtrace::force_capture()
435        } else {
436            Backtrace::disabled()
437        });
438
439        if self.debug_settings.log_allocations {
440            debug!(
441                "Allocating `{}` of {} bytes with an alignment of {}.",
442                &desc.name, size, alignment
443            );
444            if self.debug_settings.log_stack_traces {
445                let backtrace = Backtrace::force_capture();
446                debug!("Allocation stack trace: {}", backtrace);
447            }
448        }
449
450        if size == 0 || !alignment.is_power_of_two() {
451            return Err(AllocationError::InvalidAllocationCreateDesc);
452        }
453
454        // Find memory type
455        let memory_type = self
456            .memory_types
457            .iter_mut()
458            .find(|memory_type| {
459                // Is location compatible
460                desc.location == MemoryLocation::Unknown
461                    || desc.location == memory_type.memory_location
462            })
463            .ok_or(AllocationError::NoCompatibleMemoryTypeFound)?;
464
465        memory_type.allocate(&self.device, desc, backtrace, &self.allocation_sizes)
466    }
467
468    pub fn free(&mut self, allocation: &Allocation) -> Result<()> {
469        if self.debug_settings.log_frees {
470            let name = allocation.name.as_deref().unwrap_or("<null>");
471            debug!("Freeing `{}`.", name);
472            if self.debug_settings.log_stack_traces {
473                let backtrace = Backtrace::force_capture();
474                debug!("Free stack trace: {}", backtrace);
475            }
476        }
477
478        if allocation.is_null() {
479            return Ok(());
480        }
481        self.memory_types[allocation.memory_type_index].free(allocation)?;
482        Ok(())
483    }
484
485    pub fn get_heaps(&self) -> Vec<&metal::HeapRef> {
486        // Get all memory blocks
487        let mut heaps: Vec<&metal::HeapRef> = Vec::new();
488        for memory_type in &self.memory_types {
489            for block in memory_type.memory_blocks.iter().flatten() {
490                heaps.push(block.heap.as_ref());
491            }
492        }
493        heaps
494    }
495
496    pub fn generate_report(&self) -> AllocatorReport {
497        let mut allocations = vec![];
498        let mut blocks = vec![];
499        let mut total_reserved_bytes = 0;
500
501        for memory_type in &self.memory_types {
502            for block in memory_type.memory_blocks.iter().flatten() {
503                total_reserved_bytes += block.size;
504                let first_allocation = allocations.len();
505                allocations.extend(block.sub_allocator.report_allocations());
506                blocks.push(MemoryBlockReport {
507                    size: block.size,
508                    allocations: first_allocation..allocations.len(),
509                });
510            }
511        }
512
513        let total_allocated_bytes = allocations.iter().map(|report| report.size).sum();
514
515        AllocatorReport {
516            allocations,
517            blocks,
518            total_allocated_bytes,
519            total_reserved_bytes,
520        }
521    }
522}