//! Reduction operations for OxiCUDA BLAS.
//!
//! This module provides parallel reduction primitives over device buffers:
//! sum, max, min, mean, variance, and softmax. Each operation generates PTX
//! via templates from `oxicuda-ptx`, performs a two-phase block-level
//! reduction when needed, and writes the scalar (or vector) result to device
//! memory.
pub use max;
pub use mean;
pub use min;
pub use ReductionOp;
pub use softmax;
pub use sum;
pub use variance;