pub fn reduce_tensor_impl<T, Reducer>(
    ndim: i32,
    x_dims: *const i32,
    y_dims: *const i32,
    reducer: &Reducer,
    init: T,
    x: *const T,
    y: *mut T,
    context: *mut CPUContext
)