pub const MUL_KERNEL: &str = "ghostflow_mul_f32";
Multiply kernel: c[i] = a[i] * b[i]
c[i] = a[i] * b[i]