#[cfg(target_arch = "spirv")]
use crate::ScalarOrVectorTransform;
#[cfg(target_arch = "spirv")]
use crate::arch::{asm, barrier};
#[cfg(target_arch = "spirv")]
use crate::memory::{Scope, Semantics};
use crate::{Float, Integer, ScalarComposite, ScalarOrVector, SignedInteger, UnsignedInteger};
#[cfg(target_arch = "spirv")]
const SUBGROUP: u32 = Scope::Subgroup as u32;
pub type SubgroupMask = glam::UVec4;
#[non_exhaustive]
#[derive(Debug, PartialEq, Eq)]
pub enum GroupOperation {
Reduce = 0,
InclusiveScan = 1,
ExclusiveScan = 2,
ClusteredReduce = 3,
PartitionedReduceNV = 6,
PartitionedInclusiveScanNV = 7,
PartitionedExclusiveScanNV = 8,
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "subgroupBarrier")]
#[inline]
pub fn subgroup_barrier() {
barrier::control_barrier::<
SUBGROUP,
SUBGROUP,
{
Semantics::ACQUIRE_RELEASE.bits()
| Semantics::UNIFORM_MEMORY.bits()
| Semantics::WORKGROUP_MEMORY.bits()
| Semantics::IMAGE_MEMORY.bits()
},
>();
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "subgroupMemoryBarrier")]
#[inline]
pub fn subgroup_memory_barrier() {
barrier::memory_barrier::<
SUBGROUP,
{
Semantics::ACQUIRE_RELEASE.bits()
| Semantics::UNIFORM_MEMORY.bits()
| Semantics::WORKGROUP_MEMORY.bits()
| Semantics::IMAGE_MEMORY.bits()
},
>();
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "subgroupMemoryBarrierBuffer")]
#[inline]
pub fn subgroup_memory_barrier_buffer() {
barrier::memory_barrier::<
SUBGROUP,
{ Semantics::ACQUIRE_RELEASE.bits() | Semantics::UNIFORM_MEMORY.bits() },
>();
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "subgroupMemoryBarrierShared")]
#[inline]
pub fn subgroup_memory_barrier_shared() {
barrier::memory_barrier::<
SUBGROUP,
{ Semantics::ACQUIRE_RELEASE.bits() | Semantics::WORKGROUP_MEMORY.bits() },
>();
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "subgroupMemoryBarrierImage")]
#[inline]
pub fn subgroup_memory_barrier_image() {
barrier::memory_barrier::<
SUBGROUP,
{ Semantics::ACQUIRE_RELEASE.bits() | Semantics::IMAGE_MEMORY.bits() },
>();
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformElect")]
#[inline]
pub fn subgroup_elect() -> bool {
let mut result = false;
unsafe {
asm! {
"%bool = OpTypeBool",
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%result = OpGroupNonUniformElect %bool %subgroup",
"OpStore {result} %result",
subgroup = const SUBGROUP,
result = in(reg) &mut result,
}
}
result
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformAll")]
#[inline]
pub fn subgroup_all(predicate: bool) -> bool {
let mut result = false;
unsafe {
asm! {
"%bool = OpTypeBool",
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%predicate = OpLoad _ {predicate}",
"%result = OpGroupNonUniformAll %bool %subgroup %predicate",
"OpStore {result} %result",
subgroup = const SUBGROUP,
predicate = in(reg) &predicate,
result = in(reg) &mut result,
}
}
result
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformAny")]
#[inline]
pub fn subgroup_any(predicate: bool) -> bool {
let mut result = false;
unsafe {
asm! {
"%bool = OpTypeBool",
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%predicate = OpLoad _ {predicate}",
"%result = OpGroupNonUniformAny %bool %subgroup %predicate",
"OpStore {result} %result",
subgroup = const SUBGROUP,
predicate = in(reg) &predicate,
result = in(reg) &mut result,
}
}
result
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformAllEqual")]
#[inline]
pub fn subgroup_all_equal<T: ScalarComposite>(value: T) -> bool {
struct Transform(bool);
impl ScalarOrVectorTransform for Transform {
#[inline]
fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
let mut result = false;
unsafe {
asm! {
"%bool = OpTypeBool",
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%result = OpGroupNonUniformAllEqual %bool %subgroup %value",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
result = in(reg) &mut result,
}
}
self.0 &= result;
value
}
}
let mut transform = Transform(true);
value.transform(&mut transform);
transform.0
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformBroadcast")]
#[inline]
pub unsafe fn subgroup_broadcast<T: ScalarComposite>(value: T, id: u32) -> T {
struct Transform {
id: u32,
}
impl ScalarOrVectorTransform for Transform {
#[inline]
fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
let mut result = T::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%id = OpLoad _ {id}",
"%result = OpGroupNonUniformBroadcast _ %subgroup %value %id",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
id = in(reg) &self.id,
result = in(reg) &mut result,
}
}
result
}
}
value.transform(&mut Transform { id })
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformBroadcast")]
#[inline]
pub unsafe fn subgroup_broadcast_const<T: ScalarOrVector, const ID: u32>(value: T) -> T {
struct Transform<const ID: u32>;
impl<const ID: u32> ScalarOrVectorTransform for Transform<ID> {
#[inline]
fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
let mut result = T::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%id = OpConstant %u32 {id}",
"%value = OpLoad _ {value}",
"%result = OpGroupNonUniformBroadcast _ %subgroup %value %id",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
id = const ID,
result = in(reg) &mut result,
}
}
result
}
}
value.transform(&mut Transform::<ID>)
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformBroadcastFirst")]
#[inline]
pub fn subgroup_broadcast_first<T: ScalarComposite>(value: T) -> T {
struct Transform;
impl ScalarOrVectorTransform for Transform {
#[inline]
fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
let mut result = T::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%result = OpGroupNonUniformBroadcastFirst _ %subgroup %value",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
result = in(reg) &mut result,
}
}
result
}
}
value.transform(&mut Transform)
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformBallot")]
#[inline]
pub fn subgroup_ballot(predicate: bool) -> SubgroupMask {
let mut result = SubgroupMask::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%predicate = OpLoad _ {predicate}",
"%result = OpGroupNonUniformBallot typeof*{result} %subgroup %predicate",
"OpStore {result} %result",
subgroup = const SUBGROUP,
predicate = in(reg) &predicate,
result = in(reg) &mut result,
}
}
result
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformInverseBallot")]
#[inline]
pub unsafe fn subgroup_inverse_ballot(value: SubgroupMask) -> bool {
let mut result = false;
unsafe {
asm! {
"%bool = OpTypeBool",
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%result = OpGroupNonUniformInverseBallot %bool %subgroup %value",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
result = in(reg) &mut result,
}
}
result
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformBallotBitExtract")]
#[inline]
pub fn subgroup_ballot_bit_extract(value: SubgroupMask, index: u32) -> bool {
let mut result = false;
unsafe {
asm! {
"%bool = OpTypeBool",
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%index = OpLoad _ {index}",
"%result = OpGroupNonUniformBallotBitExtract %bool %subgroup %value %index",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
index = in(reg) &index,
result = in(reg) &mut result,
}
}
result
}
macro_rules! macro_subgroup_ballot_bit_count {
($name:ident, $group_op:expr) => {
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformBallotBitCount")]
#[inline]
pub fn $name(value: SubgroupMask) -> u32 {
let mut result = 0;
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%result = OpGroupNonUniformBallotBitCount %u32 %subgroup {groupop} %value",
"OpStore {result} %result",
subgroup = const SUBGROUP,
groupop = const ($group_op as u32),
value = in(reg) &value,
result = in(reg) &mut result,
}
}
result
}
};
}
macro_subgroup_ballot_bit_count!(subgroup_ballot_bit_count, GroupOperation::Reduce);
macro_subgroup_ballot_bit_count!(
subgroup_ballot_inclusive_bit_count,
GroupOperation::InclusiveScan
);
macro_subgroup_ballot_bit_count!(
subgroup_ballot_exclusive_bit_count,
GroupOperation::ExclusiveScan
);
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformBallotFindLSB")]
#[inline]
pub fn subgroup_ballot_find_lsb(value: SubgroupMask) -> u32 {
let mut result = 0;
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%result = OpGroupNonUniformBallotFindLSB %u32 %subgroup %value",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
result = in(reg) &mut result,
}
}
result
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformBallotFindMSB")]
#[inline]
pub fn subgroup_ballot_find_msb(value: SubgroupMask) -> u32 {
let mut result = 0;
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%result = OpGroupNonUniformBallotFindMSB %u32 %subgroup %value",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
result = in(reg) &mut result,
}
}
result
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformShuffle")]
#[inline]
pub fn subgroup_shuffle<T: ScalarComposite>(value: T, id: u32) -> T {
struct Transform {
id: u32,
}
impl ScalarOrVectorTransform for Transform {
#[inline]
fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
let mut result = T::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%id = OpLoad _ {id}",
"%result = OpGroupNonUniformShuffle _ %subgroup %value %id",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
id = in(reg) &self.id,
result = in(reg) &mut result,
}
}
result
}
}
value.transform(&mut Transform { id })
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformShuffleXor")]
#[inline]
pub fn subgroup_shuffle_xor<T: ScalarComposite>(value: T, mask: u32) -> T {
struct Transform {
mask: u32,
}
impl ScalarOrVectorTransform for Transform {
#[inline]
fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
let mut result = T::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%mask = OpLoad _ {mask}",
"%result = OpGroupNonUniformShuffleXor _ %subgroup %value %mask",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
mask = in(reg) &self.mask,
result = in(reg) &mut result,
}
}
result
}
}
value.transform(&mut Transform { mask })
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformShuffleUp")]
#[inline]
pub fn subgroup_shuffle_up<T: ScalarComposite>(value: T, delta: u32) -> T {
struct Transform {
delta: u32,
}
impl ScalarOrVectorTransform for Transform {
#[inline]
fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
let mut result = T::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%delta = OpLoad _ {delta}",
"%result = OpGroupNonUniformShuffleUp _ %subgroup %value %delta",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
delta = in(reg) &self.delta,
result = in(reg) &mut result,
}
}
result
}
}
value.transform(&mut Transform { delta })
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformShuffleDown")]
#[inline]
pub fn subgroup_shuffle_down<T: ScalarComposite>(value: T, delta: u32) -> T {
struct Transform {
delta: u32,
}
impl ScalarOrVectorTransform for Transform {
#[inline]
fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
let mut result = T::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%delta = OpLoad _ {delta}",
"%result = OpGroupNonUniformShuffleDown _ %subgroup %value %delta",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
delta = in(reg) &self.delta,
result = in(reg) &mut result,
}
}
result
}
}
value.transform(&mut Transform { delta })
}
macro_rules! macro_subgroup_op {
($scalar:ty, $asm_op:literal, $($name:ident, $group_op:expr),+; $docs:literal) => { $(
#[doc = $docs]
#[spirv_std_macros::gpu_only]
#[doc(alias = $asm_op)]
#[inline]
pub fn $name<I: ScalarOrVector<Scalar = $scalar>>(
value: I,
) -> I {
let mut result = I::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value"),
"OpStore {result} %result",
subgroup = const SUBGROUP,
groupop = const ($group_op as u32),
value = in(reg) &value,
result = in(reg) &mut result,
}
}
result
}
)+ };
}
macro_rules! macro_subgroup_op_clustered {
($scalar:ty, $asm_op:literal, $name:ident; $docs:literal) => {
#[doc = $docs]
#[spirv_std_macros::gpu_only]
#[doc(alias = $asm_op)]
#[inline]
pub unsafe fn $name<const CLUSTER_SIZE: u32, I: ScalarOrVector<Scalar = $scalar>>(
value: I,
) -> I {
const {
assert!(CLUSTER_SIZE >= 1, "`ClusterSize` must be at least 1");
assert!(
CLUSTER_SIZE.is_power_of_two(),
"`ClusterSize` must be a power of 2"
);
}
let mut result = I::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%clustersize = OpConstant %u32 {clustersize}",
concat!("%result = ", $asm_op, " _ %subgroup {groupop} %value %clustersize"),
"OpStore {result} %result",
subgroup = const SUBGROUP,
groupop = const (GroupOperation::ClusteredReduce as u32),
clustersize = const CLUSTER_SIZE,
value = in(reg) &value,
result = in(reg) &mut result,
}
}
result
}
};
}
macro_subgroup_op!(impl Integer, "OpGroupNonUniformIAdd", subgroup_i_add, GroupOperation::Reduce, subgroup_inclusive_i_add, GroupOperation::InclusiveScan, subgroup_exclusive_i_add, GroupOperation::ExclusiveScan; r"
An integer add group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIAdd", subgroup_clustered_i_add; r"
An integer add group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl Float, "OpGroupNonUniformFAdd", subgroup_f_add, GroupOperation::Reduce, subgroup_inclusive_f_add, GroupOperation::InclusiveScan, subgroup_exclusive_f_add, GroupOperation::ExclusiveScan; r"
A floating point add group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of floating-point type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0.
The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFAdd", subgroup_clustered_f_add; r"
A floating point add group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of floating-point type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl Integer, "OpGroupNonUniformIMul", subgroup_i_mul, GroupOperation::Reduce, subgroup_inclusive_i_mul, GroupOperation::InclusiveScan, subgroup_exclusive_i_mul, GroupOperation::ExclusiveScan; r"
An integer multiply group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 1.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformIMul", subgroup_clustered_i_mul; r"
An integer multiply group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl Float, "OpGroupNonUniformFMul", subgroup_f_mul, GroupOperation::Reduce, subgroup_inclusive_f_mul, GroupOperation::InclusiveScan, subgroup_exclusive_f_mul, GroupOperation::ExclusiveScan; r"
A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of floating-point type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 1.
The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMul", subgroup_clustered_f_mul; r"
A floating point multiply group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of floating-point type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 1. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_s_min, GroupOperation::Reduce, subgroup_inclusive_s_min, GroupOperation::InclusiveScan, subgroup_exclusive_s_min, GroupOperation::ExclusiveScan; r"
A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is `INT_MAX`.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMin", subgroup_clustered_s_min; r"
A signed integer minimum group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is `INT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_u_min, GroupOperation::Reduce, subgroup_inclusive_u_min, GroupOperation::InclusiveScan, subgroup_exclusive_u_min, GroupOperation::ExclusiveScan; r"
An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is `UINT_MAX`.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMin", subgroup_clustered_u_min; r"
An unsigned integer minimum group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is `UINT_MAX`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl Float, "OpGroupNonUniformFMin", subgroup_f_min, GroupOperation::Reduce, subgroup_inclusive_f_min, GroupOperation::InclusiveScan, subgroup_exclusive_f_min, GroupOperation::ExclusiveScan; r"
A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of floating-point type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is +INF.
The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMin", subgroup_clustered_f_min; r"
A floating point minimum group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of floating-point type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is +INF. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_s_max, GroupOperation::Reduce, subgroup_inclusive_s_max, GroupOperation::InclusiveScan, subgroup_exclusive_s_max, GroupOperation::ExclusiveScan; r"
A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is `INT_MIN`.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl SignedInteger, "OpGroupNonUniformSMax", subgroup_clustered_s_max; r"
A signed integer maximum group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is `INT_MIN`. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_u_max, GroupOperation::Reduce, subgroup_inclusive_u_max, GroupOperation::InclusiveScan, subgroup_exclusive_u_max, GroupOperation::ExclusiveScan; r"
An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl UnsignedInteger, "OpGroupNonUniformUMax", subgroup_clustered_u_max; r"
An unsigned integer maximum group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl Float, "OpGroupNonUniformFMax", subgroup_f_max, GroupOperation::Reduce, subgroup_inclusive_f_max, GroupOperation::InclusiveScan, subgroup_exclusive_f_max, GroupOperation::ExclusiveScan; r"
A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
Result Type must be a scalar or vector of floating-point type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is -INF.
The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl Float, "OpGroupNonUniformFMax", subgroup_clustered_f_max; r"
A floating point maximum group operation of all `value` operands contributed by active invocations in by group.
Result Type must be a scalar or vector of floating-point type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is -INF.
The type of `value` must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_and, GroupOperation::Reduce, subgroup_inclusive_and, GroupOperation::InclusiveScan, subgroup_exclusive_and, GroupOperation::ExclusiveScan; r"
A bitwise and group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is ~0.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseAnd", subgroup_clustered_and; r"
A bitwise and group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_or, GroupOperation::Reduce, subgroup_inclusive_or, GroupOperation::InclusiveScan, subgroup_exclusive_or, GroupOperation::ExclusiveScan; r"
A bitwise or group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseOr", subgroup_clustered_or; r"
A bitwise or group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_xor, GroupOperation::Reduce, subgroup_inclusive_xor, GroupOperation::InclusiveScan, subgroup_exclusive_xor, GroupOperation::ExclusiveScan; r"
A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(impl Integer, "OpGroupNonUniformBitwiseXor", subgroup_clustered_xor; r"
A bitwise xor group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of integer type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(bool, "OpGroupNonUniformLogicalAnd", subgroup_logical_and, GroupOperation::Reduce, subgroup_inclusive_logical_and, GroupOperation::InclusiveScan, subgroup_exclusive_logical_and, GroupOperation::ExclusiveScan; r"
A logical and group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of Boolean type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is ~0.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalAnd", subgroup_clustered_logical_and; r"
A logical and group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of Boolean type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is ~0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(bool, "OpGroupNonUniformLogicalOr", subgroup_logical_or, GroupOperation::Reduce, subgroup_inclusive_logical_or, GroupOperation::InclusiveScan, subgroup_exclusive_logical_or, GroupOperation::ExclusiveScan; r"
A logical or group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of Boolean type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalOr", subgroup_clustered_logical_or; r"
A logical or group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of Boolean type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
macro_subgroup_op!(bool, "OpGroupNonUniformLogicalXor", subgroup_logical_xor, GroupOperation::Reduce, subgroup_inclusive_logical_xor, GroupOperation::InclusiveScan, subgroup_exclusive_logical_xor, GroupOperation::ExclusiveScan; r"
A logical xor group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of Boolean type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0.
The type of `value` must be the same as Result Type.
Requires Capability `GroupNonUniformArithmetic`.
");
macro_subgroup_op_clustered!(bool, "OpGroupNonUniformLogicalXor", subgroup_clustered_logical_xor; r"
A logical xor group operation of all `value` operands contributed by active invocations in the group.
Result Type must be a scalar or vector of Boolean type.
Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
The identity I for Operation is 0. If Operation is `ClusteredReduce`, `ClusterSize` must be present.
The type of `value` must be the same as Result Type.
`ClusterSize` is the size of cluster to use. `ClusterSize` must be a scalar of integer type, whose Signedness operand is 0. `ClusterSize` must come from a constant instruction. Behavior is undefined unless `ClusterSize` is at least 1 and a power of 2. If `ClusterSize` is greater than the size of the group, executing this instruction results in undefined behavior.
Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
# Safety
* `ClusterSize` must not be greater than the size of the group
");
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformQuadBroadcast")]
#[inline]
pub fn subgroup_quad_broadcast<T: ScalarComposite>(value: T, index: u32) -> T {
struct Transform {
index: u32,
}
impl ScalarOrVectorTransform for Transform {
#[inline]
fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
let mut result = T::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%value = OpLoad _ {value}",
"%index = OpLoad _ {index}",
"%result = OpGroupNonUniformQuadBroadcast _ %subgroup %value %index",
"OpStore {result} %result",
subgroup = const SUBGROUP,
value = in(reg) &value,
index = in(reg) &self.index,
result = in(reg) &mut result,
}
}
result
}
}
value.transform(&mut Transform { index })
}
pub enum QuadDirection {
Horizontal = 0,
Vertical = 1,
Diagonal = 2,
}
#[spirv_std_macros::gpu_only]
#[doc(alias = "OpGroupNonUniformQuadSwap")]
#[inline]
pub fn subgroup_quad_swap<const DIRECTION: u32, T: ScalarComposite>(value: T) -> T {
struct Transform<const DIRECTION: u32>;
impl<const DIRECTION: u32> ScalarOrVectorTransform for Transform<DIRECTION> {
#[inline]
fn transform<T: ScalarOrVector>(&mut self, value: T) -> T {
let mut result = T::default();
unsafe {
asm! {
"%u32 = OpTypeInt 32 0",
"%subgroup = OpConstant %u32 {subgroup}",
"%direction = OpConstant %u32 {direction}",
"%value = OpLoad _ {value}",
"%result = OpGroupNonUniformQuadSwap _ %subgroup %value %direction",
"OpStore {result} %result",
subgroup = const SUBGROUP,
direction = const DIRECTION,
value = in(reg) &value,
result = in(reg) &mut result,
}
}
result
}
}
value.transform(&mut Transform::<DIRECTION>)
}