cubecl_runtime/memory_management/
mod.rs

1pub(crate) mod memory_pool;
2
3mod base;
4
5pub use base::*;
6
7/// Dynamic memory management strategy.
8mod memory_manage;
9pub use memory_manage::*;
10
11#[cfg(not(feature = "std"))]
12use alloc::vec::Vec;
13
14use crate::server::{CubeCount, CubeDim};
15
16/// The type of memory pool to use.
17#[derive(Debug, Clone)]
18pub enum PoolType {
19    /// Use a memory where every allocation is a separate page.
20    ExclusivePages {
21        /// The minimum number of bytes to allocate in this pool.
22        max_alloc_size: u64,
23    },
24    /// Use a memory where each allocation is a slice of a bigger allocation.
25    SlicedPages {
26        /// The page size to allocate.
27        page_size: u64,
28        /// The maximum size of a slice to allocate in the pool.
29        max_slice_size: u64,
30    },
31}
32
33/// Options to create a memory pool.
34#[derive(Debug, Clone)]
35pub struct MemoryPoolOptions {
36    /// What kind of pool to use.
37    pub pool_type: PoolType,
38    /// Period after which allocations are deemed unused and deallocated.
39    ///
40    /// This period is measured in the number of allocations in the parent allocator. If a page
41    /// in the pool was unused for the entire period, it will be deallocated. This period is
42    /// approximmate, as checks are only done occasionally.
43    pub dealloc_period: Option<u64>,
44}
45
46/// High level configuration of memory management.
47#[derive(Clone, Debug)]
48pub enum MemoryConfiguration {
49    /// The default preset, which uses pools that allocate sub slices.
50    #[cfg(not(exclusive_memory_only))]
51    SubSlices,
52    /// Default preset for using exclusive pages.
53    /// This can be necessary for backends don't support sub-slices.
54    ExclusivePages,
55    /// Custom settings.
56    Custom {
57        /// Options for each pool to construct. When allocating, the first
58        /// possible pool will be picked for an allocation.
59        pool_options: Vec<MemoryPoolOptions>,
60    },
61}
62
63#[allow(clippy::derivable_impls)]
64impl Default for MemoryConfiguration {
65    fn default() -> Self {
66        #[cfg(exclusive_memory_only)]
67        {
68            MemoryConfiguration::ExclusivePages
69        }
70        #[cfg(not(exclusive_memory_only))]
71        {
72            MemoryConfiguration::SubSlices
73        }
74    }
75}
76
77/// Properties of the device related to allocation.
78#[derive(Debug, Clone)]
79pub struct MemoryDeviceProperties {
80    /// The maximum nr. of bytes that can be allocated in one go.
81    pub max_page_size: u64,
82    /// The required memory offset alignment in bytes.
83    pub alignment: u64,
84}
85
86/// Properties of the device related to the accelerator hardware.
87///
88/// # Plane size min/max
89///
90/// This is a range of possible values for the plane size.
91///
92/// For Nvidia GPUs and HIP, this is a single fixed value.
93///
94/// For wgpu with AMD GPUs this is a range of possible values, but the actual configured value
95/// is undefined and can only be queried at runtime. Should usually be 32, but not guaranteed.
96///
97/// For Intel GPUs, this is variable based on the number of registers used in the kernel. No way to
98/// query this at compile time is currently available. As a result, the minimum value should usually
99/// be assumed.
100#[derive(Debug, Clone)]
101pub struct HardwareProperties {
102    /// The maximum size of a single load instruction, in bits. Used for optimized line sizes.
103    pub load_width: u32,
104    /// The minimum size of a plane on this device
105    pub plane_size_min: u32,
106    /// The maximum size of a plane on this device
107    pub plane_size_max: u32,
108    /// minimum number of bindings for a kernel that can be used at once.
109    pub max_bindings: u32,
110    /// Maximum amount of shared memory, in bytes
111    pub max_shared_memory_size: usize,
112    /// Maximum `CubeCount` in x, y and z dimensions
113    pub max_cube_count: CubeCount,
114    /// Maximum number of total units in a cube
115    pub max_units_per_cube: u32,
116    /// Maximum `CubeDim` in x, y, and z dimensions
117    pub max_cube_dim: CubeDim,
118    /// Number of streaming multiprocessors (SM), if available
119    pub num_streaming_multiprocessors: Option<u32>,
120    /// Number of available parallel cpu units, if the runtime is CPU.
121    pub num_cpu_cores: Option<u32>,
122    /// Number of tensor cores per SM, if any
123    pub num_tensor_cores: Option<u32>,
124    /// The minimum tiling dimension for a single axis in tensor cores.
125    ///
126    /// For a backend that only supports 16x16x16, the value would be 16.
127    /// For a backend that also supports 32x8x16, the value would be 8.
128    pub min_tensor_cores_dim: Option<u32>,
129}