pub const RELU_KERNEL: &str = "ghostflow_relu_f32";
ReLU kernel: y[i] = max(0, x[i])
y[i] = max(0, x[i])