pub fn reduce_kernel<In: Numeric, Out: Numeric, Acc: Numeric, RA: ReduceArgs>(
input: &RA::Input<In>,
output: &mut RA::Output<Out>,
axis_reduce: usize,
blueprint: ReduceBlueprint,
config: ReduceOperationConfig,
_input_dtype: StorageType,
_output_dtype: StorageType,
_acc_dtype: StorageType,
)