cubecl_runtime/memory_management/mod.rs
1pub(crate) mod memory_pool;
2
3mod base;
4
5pub use base::*;
6
7/// Dynamic memory management strategy.
8mod memory_manage;
9pub use memory_manage::*;
10
11#[cfg(not(feature = "std"))]
12use alloc::vec::Vec;
13
14use crate::server::{CubeCount, CubeDim};
15
16/// The type of memory pool to use.
17#[derive(Debug, Clone)]
18pub enum PoolType {
19 /// Use a memory where every allocation is a separate page.
20 ExclusivePages {
21 /// The minimum number of bytes to allocate in this pool.
22 max_alloc_size: u64,
23 },
24 /// Use a memory where each allocation is a slice of a bigger allocation.
25 SlicedPages {
26 /// The page size to allocate.
27 page_size: u64,
28 /// The maximum size of a slice to allocate in the pool.
29 max_slice_size: u64,
30 },
31}
32
33/// Options to create a memory pool.
34#[derive(Debug, Clone)]
35pub struct MemoryPoolOptions {
36 /// What kind of pool to use.
37 pub pool_type: PoolType,
38 /// Period after which allocations are deemed unused and deallocated.
39 ///
40 /// This period is measured in the number of allocations in the parent allocator. If a page
41 /// in the pool was unused for the entire period, it will be deallocated. This period is
42 /// approximmate, as checks are only done occasionally.
43 pub dealloc_period: Option<u64>,
44}
45
46/// High level configuration of memory management.
47#[derive(Clone, Debug)]
48pub enum MemoryConfiguration {
49 /// The default preset, which uses pools that allocate sub slices.
50 #[cfg(not(exclusive_memory_only))]
51 SubSlices,
52 /// Default preset for using exclusive pages.
53 /// This can be necessary for backends don't support sub-slices.
54 ExclusivePages,
55 /// Custom settings.
56 Custom {
57 /// Options for each pool to construct. When allocating, the first
58 /// possible pool will be picked for an allocation.
59 pool_options: Vec<MemoryPoolOptions>,
60 },
61}
62
63#[allow(clippy::derivable_impls)]
64impl Default for MemoryConfiguration {
65 fn default() -> Self {
66 #[cfg(exclusive_memory_only)]
67 {
68 MemoryConfiguration::ExclusivePages
69 }
70 #[cfg(not(exclusive_memory_only))]
71 {
72 MemoryConfiguration::SubSlices
73 }
74 }
75}
76
77/// Properties of the device related to allocation.
78#[derive(Debug, Clone)]
79pub struct MemoryDeviceProperties {
80 /// The maximum nr. of bytes that can be allocated in one go.
81 pub max_page_size: u64,
82 /// The required memory offset alignment in bytes.
83 pub alignment: u64,
84}
85
86/// Properties of the device related to the accelerator hardware.
87///
88/// # Plane size min/max
89///
90/// This is a range of possible values for the plane size.
91///
92/// For Nvidia GPUs and HIP, this is a single fixed value.
93///
94/// For wgpu with AMD GPUs this is a range of possible values, but the actual configured value
95/// is undefined and can only be queried at runtime. Should usually be 32, but not guaranteed.
96///
97/// For Intel GPUs, this is variable based on the number of registers used in the kernel. No way to
98/// query this at compile time is currently available. As a result, the minimum value should usually
99/// be assumed.
100#[derive(Debug, Clone)]
101pub struct HardwareProperties {
102 /// The maximum size of a single load instruction, in bits. Used for optimized line sizes.
103 pub load_width: u32,
104 /// The minimum size of a plane on this device
105 pub plane_size_min: u32,
106 /// The maximum size of a plane on this device
107 pub plane_size_max: u32,
108 /// minimum number of bindings for a kernel that can be used at once.
109 pub max_bindings: u32,
110 /// Maximum amount of shared memory, in bytes
111 pub max_shared_memory_size: usize,
112 /// Maximum `CubeCount` in x, y and z dimensions
113 pub max_cube_count: CubeCount,
114 /// Maximum number of total units in a cube
115 pub max_units_per_cube: u32,
116 /// Maximum `CubeDim` in x, y, and z dimensions
117 pub max_cube_dim: CubeDim,
118 /// Number of streaming multiprocessors (SM), if available
119 pub num_streaming_multiprocessors: Option<u32>,
120 /// Number of available parallel cpu units, if the runtime is CPU.
121 pub num_cpu_cores: Option<u32>,
122 /// Number of tensor cores per SM, if any
123 pub num_tensor_cores: Option<u32>,
124 /// The minimum tiling dimension for a single axis in tensor cores.
125 ///
126 /// For a backend that only supports 16x16x16, the value would be 16.
127 /// For a backend that also supports 32x8x16, the value would be 8.
128 pub min_tensor_cores_dim: Option<u32>,
129}