cubecl_runtime/memory_management/mod.rs
1pub(crate) mod memory_pool;
2
3mod base;
4mod memory_lock;
5
6pub use base::*;
7pub use memory_lock::*;
8
9/// Dynamic memory management strategy.
10mod memory_manage;
11pub use memory_manage::*;
12
13#[cfg(not(feature = "std"))]
14use alloc::vec::Vec;
15
16/// The type of memory pool to use.
17#[derive(Debug, Clone)]
18pub enum PoolType {
19 /// Use a memory where every allocation is a separate page.
20 ExclusivePages,
21 /// Use a memory where each allocation is a slice of a bigger allocation.
22 SlicedPages {
23 /// The maximum size of a slice to allocate in the pool.
24 max_slice_size: u64,
25 },
26}
27
28/// Options to create a memory pool.
29#[derive(Debug, Clone)]
30pub struct MemoryPoolOptions {
31 /// What kind of pool to use.
32 pub pool_type: PoolType,
33 /// The amount of bytes used for each chunk in the memory pool.
34 pub page_size: u64,
35 /// The number of chunks allocated directly at creation.
36 ///
37 /// Useful when you know in advance how much memory you'll need.
38 pub chunk_num_prealloc: u64,
39 /// Period after which allocations are deemed unused and deallocated.
40 ///
41 /// This period is measured in the number of allocations in the parent allocator. If a page
42 /// in the pool was unused for the entire period, it will be deallocated. This period is
43 /// approximmate, as checks are only done occasionally.
44 pub dealloc_period: Option<u64>,
45}
46
47/// High level configuration of memory management.
48#[derive(Clone, Debug)]
49pub enum MemoryConfiguration {
50 /// The default preset using sub sices.
51 #[cfg(not(exclusive_memory_only))]
52 SubSlices,
53 /// Default preset using only exclusive pages.
54 /// This can be necessary when backends don't support sub-slices.
55 ExclusivePages,
56 /// Customize each pool individually.
57 Custom(Vec<MemoryPoolOptions>),
58}
59
60#[allow(clippy::derivable_impls)]
61impl Default for MemoryConfiguration {
62 fn default() -> Self {
63 #[cfg(exclusive_memory_only)]
64 {
65 MemoryConfiguration::ExclusivePages
66 }
67 #[cfg(not(exclusive_memory_only))]
68 {
69 MemoryConfiguration::SubSlices
70 }
71 }
72}
73
74/// Properties of the device related to allocation.
75#[derive(Debug, Clone)]
76pub struct MemoryDeviceProperties {
77 /// The maximum nr. of bytes that can be allocated in one go.
78 pub max_page_size: u64,
79 /// The required memory offset alignment in bytes.
80 pub alignment: u64,
81}
82
83/// Properties of the device related to the accelerator hardware.
84///
85/// # Plane size min/max
86///
87/// This is a range of possible values for the plane size.
88///
89/// For Nvidia GPUs and HIP, this is a single fixed value.
90///
91/// For wgpu with AMD GPUs this is a range of possible values, but the actual configured value
92/// is undefined and can only be queried at runtime. Should usually be 32, but not guaranteed.
93///
94/// For Intel GPUs, this is variable based on the number of registers used in the kernel. No way to
95/// query this at compile time is currently available. As a result, the minimum value should usually
96/// be assumed.
97#[derive(Debug, Clone)]
98pub struct HardwareProperties {
99 /// The minimum size of a plane on this device
100 pub plane_size_min: u32,
101 /// The maximum size of a plane on this device
102 pub plane_size_max: u32,
103 /// minimum number of bindings for a kernel that can be used at once.
104 pub max_bindings: u32,
105}
106
107impl HardwareProperties {
108 /// Plane size that is defined for the device.
109 pub fn defined_plane_size(&self) -> Option<u32> {
110 #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))]
111 if self.plane_size_min == self.plane_size_max {
112 Some(self.plane_size_min)
113 } else {
114 None
115 }
116
117 // On Apple Silicon, the plane size is 32,
118 // though the minimum and maximum differ.
119 // https://github.com/gpuweb/gpuweb/issues/3950
120 #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
121 {
122 Some(32)
123 }
124 }
125}