cubecl_ir/properties.rs
1use core::hash::{BuildHasher, Hash, Hasher};
2
3use crate::{
4 AddressType, SemanticType, StorageType, Type, TypeHash,
5 features::{Features, TypeUsage},
6};
7use cubecl_common::profile::TimingMethod;
8use enumset::EnumSet;
9
10/// Properties of the device related to the accelerator hardware.
11///
12/// # Plane size min/max
13///
14/// This is a range of possible values for the plane size.
15///
16/// For Nvidia GPUs and HIP, this is a single fixed value.
17///
18/// For wgpu with AMD GPUs this is a range of possible values, but the actual configured value
19/// is undefined and can only be queried at runtime. Should usually be 32, but not guaranteed.
20///
21/// For Intel GPUs, this is variable based on the number of registers used in the kernel. No way to
22/// query this at compile time is currently available. As a result, the minimum value should usually
23/// be assumed.
24#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25pub struct HardwareProperties {
26 /// The maximum size of a single load instruction, in bits. Used for optimized line sizes.
27 pub load_width: u32,
28 /// The minimum size of a plane on this device
29 pub plane_size_min: u32,
30 /// The maximum size of a plane on this device
31 pub plane_size_max: u32,
32 /// minimum number of bindings for a kernel that can be used at once.
33 pub max_bindings: u32,
34 /// Maximum amount of shared memory, in bytes
35 pub max_shared_memory_size: usize,
36 /// Maximum `CubeCount` in x, y and z dimensions
37 pub max_cube_count: (u32, u32, u32),
38 /// Maximum number of total units in a cube
39 pub max_units_per_cube: u32,
40 /// Maximum `CubeDim` in x, y, and z dimensions
41 pub max_cube_dim: (u32, u32, u32),
42 /// Number of streaming multiprocessors (SM), if available
43 pub num_streaming_multiprocessors: Option<u32>,
44 /// Number of available parallel cpu units, if the runtime is CPU.
45 pub num_cpu_cores: Option<u32>,
46 /// Number of tensor cores per SM, if any
47 pub num_tensor_cores: Option<u32>,
48 /// The minimum tiling dimension for a single axis in tensor cores.
49 ///
50 /// For a backend that only supports 16x16x16, the value would be 16.
51 /// For a backend that also supports 32x8x16, the value would be 8.
52 pub min_tensor_cores_dim: Option<u32>,
53}
54
55/// Properties of the device related to allocation.
56#[derive(Debug, Clone, PartialEq, Eq, Hash)]
57pub struct MemoryDeviceProperties {
58 /// The maximum nr. of bytes that can be allocated in one go.
59 pub max_page_size: u64,
60 /// The required memory offset alignment in bytes.
61 pub alignment: u64,
62}
63
64/// Properties of what the device can do, like what `Feature` are
65/// supported by it and what its memory properties are.
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub struct DeviceProperties {
68 /// The features supported by the runtime.
69 pub features: Features,
70 /// The memory properties of this client.
71 pub memory: MemoryDeviceProperties,
72 /// The topology properties of this client.
73 pub hardware: HardwareProperties,
74 /// The method used for profiling on the device.
75 pub timing_method: TimingMethod,
76}
77
78impl TypeHash for DeviceProperties {
79 fn write_hash(_hasher: &mut impl core::hash::Hasher) {
80 // ignored.
81 }
82}
83
84impl DeviceProperties {
85 /// Create a new feature set with the given features and memory properties.
86 pub fn new(
87 features: Features,
88 memory_props: MemoryDeviceProperties,
89 hardware: HardwareProperties,
90 timing_method: TimingMethod,
91 ) -> Self {
92 DeviceProperties {
93 features,
94 memory: memory_props,
95 hardware,
96 timing_method,
97 }
98 }
99
100 /// Get the usages for a type
101 pub fn type_usage(&self, ty: StorageType) -> EnumSet<TypeUsage> {
102 self.features.type_usage(ty)
103 }
104
105 /// Whether the type is supported in any way
106 pub fn supports_type(&self, ty: impl Into<Type>) -> bool {
107 self.features.supports_type(ty)
108 }
109
110 /// Whether the address type is supported in any way
111 pub fn supports_address(&self, ty: impl Into<AddressType>) -> bool {
112 self.features.supports_address(ty)
113 }
114
115 /// Register an address type to the features
116 pub fn register_address_type(&mut self, ty: impl Into<AddressType>) {
117 self.features.address_types.insert(ty.into());
118 }
119
120 /// Register a storage type to the features
121 pub fn register_type_usage(
122 &mut self,
123 ty: impl Into<StorageType>,
124 uses: impl Into<EnumSet<TypeUsage>>,
125 ) {
126 *self.features.storage_types.entry(ty.into()).or_default() |= uses.into();
127 }
128
129 /// Register a semantic type to the features
130 pub fn register_semantic_type(&mut self, ty: SemanticType) {
131 self.features.semantic_types.insert(ty);
132 }
133
134 /// Create a stable hash of all device properties relevant to kernel compilation. Can be used
135 /// as a stable checksum for a compilation cache.
136 pub fn checksum(&self) -> u64 {
137 let state = foldhash::fast::FixedState::default();
138 let mut hasher = state.build_hasher();
139 self.features.hash(&mut hasher);
140 self.hardware.hash(&mut hasher);
141 hasher.finish()
142 }
143}