pub struct DeviceProfile {Show 32 fields
pub backend: &'static str,
pub supports_subgroup_ops: bool,
pub supports_indirect_dispatch: bool,
pub supports_distributed_collectives: bool,
pub supports_specialization_constants: bool,
pub supports_f16: bool,
pub supports_bf16: bool,
pub supports_trap_propagation: bool,
pub supports_tensor_cores: bool,
pub has_mul_high: bool,
pub has_dual_issue_fp32_int32: bool,
pub has_subgroup_shuffle: bool,
pub has_shared_memory: bool,
pub max_native_int_width: u32,
pub max_workgroup_size: [u32; 3],
pub max_invocations_per_workgroup: u32,
pub max_shared_memory_bytes: u32,
pub max_storage_buffer_binding_size: u64,
pub subgroup_size: u32,
pub compute_units: u32,
pub regs_per_thread_max: u32,
pub l1_cache_bytes: u32,
pub l2_cache_bytes: u32,
pub mem_bw_gbps: u32,
pub timing_quality: DeviceTimingQuality,
pub supports_device_timestamps: bool,
pub supports_hardware_counters: bool,
pub ideal_unroll_depth: u32,
pub ideal_vector_pack_bits: u32,
pub ideal_workgroup_tile: [u32; 3],
pub shared_memory_bank_count: u32,
pub shared_memory_bank_width_bytes: u32,
}Expand description
Device capability snapshot used across driver-shared planning.
Fields§
§backend: &'static strStable backend identifier.
supports_subgroup_ops: boolThe device and lowering path support subgroup intrinsics.
supports_indirect_dispatch: boolThe backend supports indirect dispatch.
supports_distributed_collectives: boolThe backend lowers distributed collective communication nodes.
supports_specialization_constants: boolThe backend supports compile-time specialization constants.
supports_f16: boolThe backend lowers binary16 natively.
supports_bf16: boolThe backend lowers bfloat16 natively.
supports_trap_propagation: boolThe backend preserves explicit trap propagation.
supports_tensor_cores: boolThe backend lowers matrix-engine operations for supported shapes.
has_mul_high: boolNative unsigned multiply-high is available to lowering strategies.
has_dual_issue_fp32_int32: boolInteger and float pipelines can issue concurrently.
has_subgroup_shuffle: boolSubgroup shuffle-like communication is available.
Explicit workgroup/shared memory is available.
max_native_int_width: u32Maximum native integer width in bits.
max_workgroup_size: [u32; 3]Maximum workgroup dimensions.
max_invocations_per_workgroup: u32Maximum invocations in one workgroup.
Shared memory per workgroup in bytes.
max_storage_buffer_binding_size: u64Maximum single storage-buffer binding in bytes.
subgroup_size: u32Native subgroup size, or 0 when unknown.
compute_units: u32Physical compute-unit count, or 0 when unknown.
regs_per_thread_max: u32Maximum registers per thread, or 0 when unknown.
l1_cache_bytes: u32L1 cache size in bytes, or 0 when unknown.
l2_cache_bytes: u32L2 cache size in bytes, or 0 when unknown.
mem_bw_gbps: u32Peak memory bandwidth in GB/s, or 0 when unknown.
timing_quality: DeviceTimingQualityTiming-data quality exposed by this backend/device.
supports_device_timestamps: boolDevice timestamp queries/events are available for dispatch timing.
supports_hardware_counters: boolHardware counter sampling is available for benchmark telemetry.
ideal_unroll_depth: u32Device-profile preferred unroll depth, or 0 when unknown.
ideal_vector_pack_bits: u32Device-profile preferred vector pack width in bits, or 0 when unknown.
ideal_workgroup_tile: [u32; 3]Device-profile preferred workgroup tile, or [0, 0, 0] when unknown.
Shared-memory bank count, or 0 when unknown.
Shared-memory bank width in bytes, or 0 when unknown.
Implementations§
Source§impl DeviceProfile
impl DeviceProfile
Sourcepub const fn conservative(backend: &'static str) -> Self
pub const fn conservative(backend: &'static str) -> Self
Conservative profile for a backend that has not probed a device.
Sourcepub fn from_backend(backend: &dyn VyreBackend) -> Self
pub fn from_backend(backend: &dyn VyreBackend) -> Self
Build a profile from the stable backend trait capability methods.
Sourcepub const fn validation_capabilities(self) -> BackendCapabilities
pub const fn validation_capabilities(self) -> BackendCapabilities
Validation capability projection.
Sourcepub const fn adapter_caps(self) -> AdapterCaps
pub const fn adapter_caps(self) -> AdapterCaps
Optimizer capability projection.
Sourcepub const fn strategy_capabilities(self) -> BackendCapabilities
pub const fn strategy_capabilities(self) -> BackendCapabilities
Strategy capability projection.
Trait Implementations§
Source§impl Clone for DeviceProfile
impl Clone for DeviceProfile
Source§fn clone(&self) -> DeviceProfile
fn clone(&self) -> DeviceProfile
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreimpl Copy for DeviceProfile
Source§impl Debug for DeviceProfile
impl Debug for DeviceProfile
Source§impl Default for DeviceProfile
impl Default for DeviceProfile
impl Eq for DeviceProfile
Source§impl From<DeviceProfile> for AdapterCaps
impl From<DeviceProfile> for AdapterCaps
Source§fn from(profile: DeviceProfile) -> Self
fn from(profile: DeviceProfile) -> Self
Source§impl From<DeviceProfile> for BackendCapabilities
impl From<DeviceProfile> for BackendCapabilities
Source§fn from(profile: DeviceProfile) -> Self
fn from(profile: DeviceProfile) -> Self
Source§impl PartialEq for DeviceProfile
impl PartialEq for DeviceProfile
Source§fn eq(&self, other: &DeviceProfile) -> bool
fn eq(&self, other: &DeviceProfile) -> bool
self and other values to be equal, and is used by ==.impl StructuralPartialEq for DeviceProfile
Auto Trait Implementations§
impl Freeze for DeviceProfile
impl RefUnwindSafe for DeviceProfile
impl Send for DeviceProfile
impl Sync for DeviceProfile
impl Unpin for DeviceProfile
impl UnsafeUnpin for DeviceProfile
impl UnwindSafe for DeviceProfile
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<Q, K> Equivalent<K> for Q
impl<Q, K> Equivalent<K> for Q
Source§impl<Q, K> Equivalent<K> for Q
impl<Q, K> Equivalent<K> for Q
Source§fn equivalent(&self, key: &K) -> bool
fn equivalent(&self, key: &K) -> bool
key and return true if they are equal.