#![cfg_attr(target_arch = "nvptx64", no_std)]
#![cfg_attr(target_arch = "nvptx64", no_main)]
#![cfg_attr(target_arch = "nvptx64", feature(abi_ptx, asm_experimental_arch))]
#[cfg(not(feature = "warp64"))]
pub const WARP_SIZE: u32 = 32;
#[cfg(feature = "warp64")]
pub const WARP_SIZE: u32 = 64;
pub mod active_set;
pub mod block;
pub mod cub;
pub mod data;
pub mod diverge;
pub mod dynamic;
pub mod fence;
pub mod gpu;
pub mod gradual;
pub mod merge;
pub mod platform;
#[cfg(any(test, feature = "formal-proof"))]
pub mod proof;
pub mod shuffle;
pub mod simwarp;
pub mod sort;
pub mod tile;
pub mod warp;
#[cfg(any(test, feature = "research"))]
#[cfg(not(target_arch = "nvptx64"))]
#[allow(dead_code)]
#[allow(
clippy::new_without_default,
clippy::needless_range_loop,
clippy::module_inception,
clippy::doc_markdown,
clippy::empty_line_after_doc_comments,
clippy::items_after_test_module,
clippy::approx_constant,
rustdoc::invalid_html_tags,
rustdoc::broken_intra_doc_links,
rustdoc::invalid_rust_codeblocks
)]
pub mod research;
#[export_name = "warp_types_zero_overhead_butterfly"]
#[inline(never)]
pub fn zero_overhead_butterfly(data: data::PerLane<i32>) -> i32 {
let warp: Warp<All> = Warp::kernel_entry();
let step1 = warp.shuffle_xor(data, 16);
let step2 = warp.shuffle_xor(step1, 8);
let step3 = warp.shuffle_xor(step2, 4);
let step4 = warp.shuffle_xor(step3, 2);
let step5 = warp.shuffle_xor(step4, 1);
warp.reduce_sum(step5).get()
}
#[export_name = "warp_types_zero_overhead_diverge_merge"]
#[inline(never)]
pub fn zero_overhead_diverge_merge(data: data::PerLane<i32>) -> data::PerLane<i32> {
let warp: Warp<All> = Warp::kernel_entry();
let (evens, odds) = warp.diverge_even_odd();
let _merged: Warp<All> = merge(evens, odds);
data }
#[doc(hidden)]
pub mod gpu_sealed {
#[doc(hidden)]
pub(crate) struct GpuSealToken;
#[allow(private_interfaces)]
pub trait GpuSealed {
#[doc(hidden)]
fn _gpu_sealed() -> GpuSealToken;
}
}
pub trait GpuValue: gpu_sealed::GpuSealed + Copy + Send + Sync + Default + 'static {}
macro_rules! impl_gpu_value {
($($t:ty),*) => {
$(
#[allow(private_interfaces)]
impl gpu_sealed::GpuSealed for $t {
fn _gpu_sealed() -> gpu_sealed::GpuSealToken {
gpu_sealed::GpuSealToken
}
}
impl GpuValue for $t {}
)*
};
}
impl_gpu_value!(i32, u32, f32, i64, u64, f64, bool);
pub use active_set::{
ActiveSet, All, CanDiverge, ComplementOf, ComplementWithin, Empty, Even, EvenHigh, EvenLow,
HighHalf, Lane0, LowHalf, NotLane0, Odd, OddHigh, OddLow,
};
pub use block::{BlockId, SharedRegion, ThreadId};
pub use data::{LaneId, PerLane, Role, SingleLane, Uniform, WarpId};
pub use dynamic::DynDiverge;
pub use fence::{Fenced, FullWrite, GlobalRegion, PartialWrite, Unwritten, WriteState};
pub use gradual::DynWarp;
pub use merge::{merge, merge_within};
pub use platform::{CpuSimd, GpuWarp32, GpuWarp64, Platform, SimdVector};
pub use shuffle::{
BallotResult, Compose, HasDual, Identity, Permutation, RotateDown, RotateUp, ShuffleSafe, Xor,
};
pub use tile::Tile;
pub use warp::Warp;
pub use warp_types_kernel::warp_kernel;
pub mod prelude {
pub use crate::data;
pub use crate::gpu::GpuShuffle;
pub use crate::{
merge, merge_within, warp_kernel, ActiveSet, All, BallotResult, CanDiverge, ComplementOf,
ComplementWithin, DynDiverge, DynWarp, Empty, Even, EvenHigh, EvenLow, Fenced, FullWrite,
GlobalRegion, GpuValue, HighHalf, Lane0, LaneId, LowHalf, NotLane0, Odd, OddHigh, OddLow,
PartialWrite, PerLane, SingleLane, Tile, Uniform, Unwritten, Warp, WarpId, WriteState,
};
}