cubecl_runtime/memory_management/
mod.rs

1pub(crate) mod memory_pool;
2
3mod base;
4
5pub use base::*;
6
7/// Dynamic memory management strategy.
8mod memory_manage;
9use cubecl_common::CubeDim;
10pub use memory_manage::*;
11
12#[cfg(not(feature = "std"))]
13use alloc::vec::Vec;
14
15use crate::server::CubeCount;
16
17/// The type of memory pool to use.
18#[derive(Debug, Clone)]
19pub enum PoolType {
20    /// Use a memory where every allocation is a separate page.
21    ExclusivePages {
22        /// The minimum number of bytes to allocate in this pool.
23        max_alloc_size: u64,
24    },
25    /// Use a memory where each allocation is a slice of a bigger allocation.
26    SlicedPages {
27        /// The page size to allocate.
28        page_size: u64,
29        /// The maximum size of a slice to allocate in the pool.
30        max_slice_size: u64,
31    },
32}
33
34/// Options to create a memory pool.
35#[derive(Debug, Clone)]
36pub struct MemoryPoolOptions {
37    /// What kind of pool to use.
38    pub pool_type: PoolType,
39    /// Period after which allocations are deemed unused and deallocated.
40    ///
41    /// This period is measured in the number of allocations in the parent allocator. If a page
42    /// in the pool was unused for the entire period, it will be deallocated. This period is
43    /// approximmate, as checks are only done occasionally.
44    pub dealloc_period: Option<u64>,
45}
46
47/// High level configuration of memory management.
48#[derive(Clone, Debug)]
49pub enum MemoryConfiguration {
50    /// The default preset, which uses pools that allocate sub slices.
51    #[cfg(not(exclusive_memory_only))]
52    SubSlices,
53    /// Default preset for using exclusive pages.
54    /// This can be necessary for backends don't support sub-slices.
55    ExclusivePages,
56    /// Custom settings.
57    Custom {
58        /// Options for each pool to construct. When allocating, the first
59        /// possible pool will be picked for an allocation.
60        pool_options: Vec<MemoryPoolOptions>,
61    },
62}
63
64#[allow(clippy::derivable_impls)]
65impl Default for MemoryConfiguration {
66    fn default() -> Self {
67        #[cfg(exclusive_memory_only)]
68        {
69            MemoryConfiguration::ExclusivePages
70        }
71        #[cfg(not(exclusive_memory_only))]
72        {
73            MemoryConfiguration::SubSlices
74        }
75    }
76}
77
78/// Properties of the device related to allocation.
79#[derive(Debug, Clone)]
80pub struct MemoryDeviceProperties {
81    /// The maximum nr. of bytes that can be allocated in one go.
82    pub max_page_size: u64,
83    /// The required memory offset alignment in bytes.
84    pub alignment: u64,
85}
86
87/// Properties of the device related to the accelerator hardware.
88///
89/// # Plane size min/max
90///
91/// This is a range of possible values for the plane size.
92///
93/// For Nvidia GPUs and HIP, this is a single fixed value.
94///
95/// For wgpu with AMD GPUs this is a range of possible values, but the actual configured value
96/// is undefined and can only be queried at runtime. Should usually be 32, but not guaranteed.
97///
98/// For Intel GPUs, this is variable based on the number of registers used in the kernel. No way to
99/// query this at compile time is currently available. As a result, the minimum value should usually
100/// be assumed.
101#[derive(Debug, Clone)]
102pub struct HardwareProperties {
103    /// The minimum size of a plane on this device
104    pub plane_size_min: u32,
105    /// The maximum size of a plane on this device
106    pub plane_size_max: u32,
107    /// minimum number of bindings for a kernel that can be used at once.
108    pub max_bindings: u32,
109    /// Maximum amount of shared memory, in bytes
110    pub max_shared_memory_size: usize,
111    /// Maximum `CubeCount` in x, y and z dimensions
112    pub max_cube_count: CubeCount,
113    /// Maximum number of total units in a cube
114    pub max_units_per_cube: u32,
115    /// Maximum `CubeDim` in x, y, and z dimensions
116    pub max_cube_dim: CubeDim,
117    /// Number of streaming multiprocessors (SM), if available
118    pub num_streaming_multiprocessors: Option<u32>,
119    /// Number of tensor cores per SM, if any
120    pub num_tensor_cores: Option<u32>,
121    /// The minimum tiling dimension for a single axis in tensor cores.
122    ///
123    /// For a backend that only supports 16x16x16, the value would be 16.
124    /// For a backend that also supports 32x8x16, the value would be 8.
125    pub min_tensor_cores_dim: Option<u32>,
126}