Skip to main content

cubecl_ir/
properties.rs

1use core::hash::{BuildHasher, Hash, Hasher};
2
3use crate::{
4    AddressType, LineSize, SemanticType, StorageType, Type, TypeHash,
5    features::{Features, TypeUsage},
6};
7use cubecl_common::profile::TimingMethod;
8use enumset::EnumSet;
9
10/// Properties of the device related to the accelerator hardware.
11///
12/// # Plane size min/max
13///
14/// This is a range of possible values for the plane size.
15///
16/// For Nvidia GPUs and HIP, this is a single fixed value.
17///
18/// For wgpu with AMD GPUs this is a range of possible values, but the actual configured value
19/// is undefined and can only be queried at runtime. Should usually be 32, but not guaranteed.
20///
21/// For Intel GPUs, this is variable based on the number of registers used in the kernel. No way to
22/// query this at compile time is currently available. As a result, the minimum value should usually
23/// be assumed.
24#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25pub struct HardwareProperties {
26    /// The maximum size of a single load instruction, in bits. Used for optimized line sizes.
27    pub load_width: u32,
28    /// The minimum size of a plane on this device
29    pub plane_size_min: u32,
30    /// The maximum size of a plane on this device
31    pub plane_size_max: u32,
32    /// minimum number of bindings for a kernel that can be used at once.
33    pub max_bindings: u32,
34    /// Maximum amount of shared memory, in bytes
35    pub max_shared_memory_size: usize,
36    /// Maximum `CubeCount` in x, y and z dimensions
37    pub max_cube_count: (u32, u32, u32),
38    /// Maximum number of total units in a cube
39    pub max_units_per_cube: u32,
40    /// Maximum `CubeDim` in x, y, and z dimensions
41    pub max_cube_dim: (u32, u32, u32),
42    /// Number of streaming multiprocessors (SM), if available
43    pub num_streaming_multiprocessors: Option<u32>,
44    /// Number of available parallel cpu units, if the runtime is CPU.
45    pub num_cpu_cores: Option<u32>,
46    /// Number of tensor cores per SM, if any
47    pub num_tensor_cores: Option<u32>,
48    /// The minimum tiling dimension for a single axis in tensor cores.
49    ///
50    /// For a backend that only supports 16x16x16, the value would be 16.
51    /// For a backend that also supports 32x8x16, the value would be 8.
52    pub min_tensor_cores_dim: Option<u32>,
53    /// Maximum line size supported by the device
54    pub max_line_size: LineSize,
55}
56
57/// Properties of the device related to allocation.
58#[derive(Debug, Clone, PartialEq, Eq, Hash)]
59pub struct MemoryDeviceProperties {
60    /// The maximum nr. of bytes that can be allocated in one go.
61    pub max_page_size: u64,
62    /// The required memory offset alignment in bytes.
63    pub alignment: u64,
64}
65
66/// Properties of what the device can do, like what `Feature` are
67/// supported by it and what its memory properties are.
68#[derive(Debug, Clone, PartialEq, Eq)]
69pub struct DeviceProperties {
70    /// The features supported by the runtime.
71    pub features: Features,
72    /// The memory properties of this client.
73    pub memory: MemoryDeviceProperties,
74    /// The topology properties of this client.
75    pub hardware: HardwareProperties,
76    /// The method used for profiling on the device.
77    pub timing_method: TimingMethod,
78}
79
80impl TypeHash for DeviceProperties {
81    fn write_hash(_hasher: &mut impl core::hash::Hasher) {
82        // ignored.
83    }
84}
85
86impl DeviceProperties {
87    /// Create a new feature set with the given features and memory properties.
88    pub fn new(
89        features: Features,
90        memory_props: MemoryDeviceProperties,
91        hardware: HardwareProperties,
92        timing_method: TimingMethod,
93    ) -> Self {
94        DeviceProperties {
95            features,
96            memory: memory_props,
97            hardware,
98            timing_method,
99        }
100    }
101
102    /// Get the usages for a type
103    pub fn type_usage(&self, ty: StorageType) -> EnumSet<TypeUsage> {
104        self.features.type_usage(ty)
105    }
106
107    /// Whether the type is supported in any way
108    pub fn supports_type(&self, ty: impl Into<Type>) -> bool {
109        self.features.supports_type(ty)
110    }
111
112    /// Whether the address type is supported in any way
113    pub fn supports_address(&self, ty: impl Into<AddressType>) -> bool {
114        self.features.supports_address(ty)
115    }
116
117    /// Register an address type to the features
118    pub fn register_address_type(&mut self, ty: impl Into<AddressType>) {
119        self.features.address_types.insert(ty.into());
120    }
121
122    /// Register a storage type to the features
123    pub fn register_type_usage(
124        &mut self,
125        ty: impl Into<StorageType>,
126        uses: impl Into<EnumSet<TypeUsage>>,
127    ) {
128        *self.features.storage_types.entry(ty.into()).or_default() |= uses.into();
129    }
130
131    /// Register a semantic type to the features
132    pub fn register_semantic_type(&mut self, ty: SemanticType) {
133        self.features.semantic_types.insert(ty);
134    }
135
136    /// Create a stable hash of all device properties relevant to kernel compilation. Can be used
137    /// as a stable checksum for a compilation cache.
138    pub fn checksum(&self) -> u64 {
139        let state = foldhash::fast::FixedState::default();
140        let mut hasher = state.build_hasher();
141        self.features.hash(&mut hasher);
142        self.hardware.hash(&mut hasher);
143        hasher.finish()
144    }
145}