cubecl_runtime/memory_management/mod.rs
1pub(crate) mod memory_pool;
2
3mod base;
4
5pub use base::*;
6
7/// Dynamic memory management strategy.
8mod memory_manage;
9use cubecl_common::CubeDim;
10pub use memory_manage::*;
11
12#[cfg(not(feature = "std"))]
13use alloc::vec::Vec;
14
15use crate::server::CubeCount;
16
17/// The type of memory pool to use.
18#[derive(Debug, Clone)]
19pub enum PoolType {
20 /// Use a memory where every allocation is a separate page.
21 ExclusivePages {
22 /// The minimum number of bytes to allocate in this pool.
23 max_alloc_size: u64,
24 },
25 /// Use a memory where each allocation is a slice of a bigger allocation.
26 SlicedPages {
27 /// The page size to allocate.
28 page_size: u64,
29 /// The maximum size of a slice to allocate in the pool.
30 max_slice_size: u64,
31 },
32}
33
34/// Options to create a memory pool.
35#[derive(Debug, Clone)]
36pub struct MemoryPoolOptions {
37 /// What kind of pool to use.
38 pub pool_type: PoolType,
39 /// Period after which allocations are deemed unused and deallocated.
40 ///
41 /// This period is measured in the number of allocations in the parent allocator. If a page
42 /// in the pool was unused for the entire period, it will be deallocated. This period is
43 /// approximmate, as checks are only done occasionally.
44 pub dealloc_period: Option<u64>,
45}
46
47/// High level configuration of memory management.
48#[derive(Clone, Debug)]
49pub enum MemoryConfiguration {
50 /// The default preset, which uses pools that allocate sub slices.
51 #[cfg(not(exclusive_memory_only))]
52 SubSlices,
53 /// Default preset for using exclusive pages.
54 /// This can be necessary for backends don't support sub-slices.
55 ExclusivePages,
56 /// Custom settings.
57 Custom {
58 /// Options for each pool to construct. When allocating, the first
59 /// possible pool will be picked for an allocation.
60 pool_options: Vec<MemoryPoolOptions>,
61 },
62}
63
64#[allow(clippy::derivable_impls)]
65impl Default for MemoryConfiguration {
66 fn default() -> Self {
67 #[cfg(exclusive_memory_only)]
68 {
69 MemoryConfiguration::ExclusivePages
70 }
71 #[cfg(not(exclusive_memory_only))]
72 {
73 MemoryConfiguration::SubSlices
74 }
75 }
76}
77
78/// Properties of the device related to allocation.
79#[derive(Debug, Clone)]
80pub struct MemoryDeviceProperties {
81 /// The maximum nr. of bytes that can be allocated in one go.
82 pub max_page_size: u64,
83 /// The required memory offset alignment in bytes.
84 pub alignment: u64,
85}
86
87/// Properties of the device related to the accelerator hardware.
88///
89/// # Plane size min/max
90///
91/// This is a range of possible values for the plane size.
92///
93/// For Nvidia GPUs and HIP, this is a single fixed value.
94///
95/// For wgpu with AMD GPUs this is a range of possible values, but the actual configured value
96/// is undefined and can only be queried at runtime. Should usually be 32, but not guaranteed.
97///
98/// For Intel GPUs, this is variable based on the number of registers used in the kernel. No way to
99/// query this at compile time is currently available. As a result, the minimum value should usually
100/// be assumed.
101#[derive(Debug, Clone)]
102pub struct HardwareProperties {
103 /// The minimum size of a plane on this device
104 pub plane_size_min: u32,
105 /// The maximum size of a plane on this device
106 pub plane_size_max: u32,
107 /// minimum number of bindings for a kernel that can be used at once.
108 pub max_bindings: u32,
109 /// Maximum amount of shared memory, in bytes
110 pub max_shared_memory_size: usize,
111 /// Maximum `CubeCount` in x, y and z dimensions
112 pub max_cube_count: CubeCount,
113 /// Maximum number of total units in a cube
114 pub max_units_per_cube: u32,
115 /// Maximum `CubeDim` in x, y, and z dimensions
116 pub max_cube_dim: CubeDim,
117 /// Number of streaming multiprocessors (SM), if available
118 pub num_streaming_multiprocessors: Option<u32>,
119 /// Number of tensor cores per SM, if any
120 pub num_tensor_cores: Option<u32>,
121 /// The minimum tiling dimension for a single axis in tensor cores.
122 ///
123 /// For a backend that only supports 16x16x16, the value would be 16.
124 /// For a backend that also supports 32x8x16, the value would be 8.
125 pub min_tensor_cores_dim: Option<u32>,
126}