pub fn reduce_max_impl<T>(
    ndim: i32,
    x_dims: *const i32,
    y_dims: *const i32,
    alpha: T,
    x: *const T,
    y: *mut T,
    context: *mut CPUContext
)