use vyre_foundation::ir::Program;
use super::atomic_scalar::{atomic_reduce_u32, AtomicReduceKind};
pub const OP_ID: &str = "vyre-primitives::reduce::count";
#[must_use]
pub fn reduce_count(bitset: &str, out: &str, words: u32) -> Program {
atomic_reduce_u32(bitset, out, words, AtomicReduceKind::PopcountSum, OP_ID)
}
#[must_use]
pub fn cpu_ref(bitset: &[u32]) -> u32 {
bitset.iter().map(|w| w.count_ones()).sum()
}
#[cfg(feature = "inventory-registry")]
inventory::submit! {
crate::harness::OpEntry::new(
OP_ID,
|| reduce_count("bitset", "out", 2),
Some(|| {
let to_bytes = |w: &[u32]| w.iter().flat_map(|v| v.to_le_bytes()).collect::<Vec<u8>>();
vec![vec![to_bytes(&[0b1111, 0xFFFF_FFFF]), to_bytes(&[0])]]
}),
Some(|| {
let to_bytes = |w: &[u32]| w.iter().flat_map(|v| v.to_le_bytes()).collect::<Vec<u8>>();
vec![vec![to_bytes(&[36])]]
}),
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn total_bit_count() {
assert_eq!(cpu_ref(&[0b1111, 0xFFFF_FFFF]), 36);
}
#[test]
fn program_uses_parallel_grid_stride() {
let program = reduce_count("bitset", "out", 513);
assert_eq!(
program.workgroup_size(),
[crate::reduce::atomic_scalar::WORKGROUP_SIZE, 1, 1]
);
}
}