cubecl_runtime/memory_management/
mod.rs

1pub(crate) mod memory_pool;
2
3mod base;
4mod memory_lock;
5
6pub use base::*;
7pub use memory_lock::*;
8
9/// Dynamic memory management strategy.
10mod memory_manage;
11pub use memory_manage::*;
12
13#[cfg(not(feature = "std"))]
14use alloc::vec::Vec;
15
16/// The type of memory pool to use.
17#[derive(Debug, Clone)]
18pub enum PoolType {
19    /// Use a memory where every allocation is a separate page.
20    ExclusivePages,
21    /// Use a memory where each allocation is a slice of a bigger allocation.
22    SlicedPages {
23        /// The maximum size of a slice to allocate in the pool.
24        max_slice_size: u64,
25    },
26}
27
28/// Options to create a memory pool.
29#[derive(Debug, Clone)]
30pub struct MemoryPoolOptions {
31    /// What kind of pool to use.
32    pub pool_type: PoolType,
33    /// The amount of bytes used for each chunk in the memory pool.
34    pub page_size: u64,
35    /// The number of chunks allocated directly at creation.
36    ///
37    /// Useful when you know in advance how much memory you'll need.
38    pub chunk_num_prealloc: u64,
39    /// Period after which allocations are deemed unused and deallocated.
40    ///
41    /// This period is measured in the number of allocations in the parent allocator. If a page
42    /// in the pool was unused for the entire period, it will be deallocated. This period is
43    /// approximmate, as checks are only done occasionally.
44    pub dealloc_period: Option<u64>,
45}
46
47/// High level configuration of memory management.
48#[derive(Clone, Debug)]
49pub enum MemoryConfiguration {
50    /// The default preset using sub sices.
51    #[cfg(not(exclusive_memory_only))]
52    SubSlices,
53    /// Default preset using only exclusive pages.
54    /// This can be necessary when backends don't support sub-slices.
55    ExclusivePages,
56    /// Customize each pool individually.
57    Custom(Vec<MemoryPoolOptions>),
58}
59
60#[allow(clippy::derivable_impls)]
61impl Default for MemoryConfiguration {
62    fn default() -> Self {
63        #[cfg(exclusive_memory_only)]
64        {
65            MemoryConfiguration::ExclusivePages
66        }
67        #[cfg(not(exclusive_memory_only))]
68        {
69            MemoryConfiguration::SubSlices
70        }
71    }
72}
73
74/// Properties of the device related to allocation.
75#[derive(Debug, Clone)]
76pub struct MemoryDeviceProperties {
77    /// The maximum nr. of bytes that can be allocated in one go.
78    pub max_page_size: u64,
79    /// The required memory offset alignment in bytes.
80    pub alignment: u64,
81}
82
83/// Properties of the device related to the accelerator hardware.
84///
85/// # Plane size min/max
86///
87/// This is a range of possible values for the plane size.
88///
89/// For Nvidia GPUs and HIP, this is a single fixed value.
90///
91/// For wgpu with AMD GPUs this is a range of possible values, but the actual configured value
92/// is undefined and can only be queried at runtime. Should usually be 32, but not guaranteed.
93///
94/// For Intel GPUs, this is variable based on the number of registers used in the kernel. No way to
95/// query this at compile time is currently available. As a result, the minimum value should usually
96/// be assumed.
97#[derive(Debug, Clone)]
98pub struct HardwareProperties {
99    /// The minimum size of a plane on this device
100    pub plane_size_min: u32,
101    /// The maximum size of a plane on this device
102    pub plane_size_max: u32,
103    /// minimum number of bindings for a kernel that can be used at once.
104    pub max_bindings: u32,
105}
106
107impl HardwareProperties {
108    /// Plane size that is defined for the device.
109    pub fn defined_plane_size(&self) -> Option<u32> {
110        #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))]
111        if self.plane_size_min == self.plane_size_max {
112            Some(self.plane_size_min)
113        } else {
114            None
115        }
116
117        // On Apple Silicon, the plane size is 32,
118        // though the minimum and maximum differ.
119        // https://github.com/gpuweb/gpuweb/issues/3950
120        #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
121        {
122            Some(32)
123        }
124    }
125}