faster 0.5.2

Explicit SIMD for humans

// impl_cast!(Asu8s, i8x16, u8x16, as_u8s, as_u8x16);
// impl_cast!(Asi8s, u8x16, i8x16, as_i8s, as_i8x16);

// impl_cast!(Asu8s, i8x32, u8x32, as_u8s, as_u8x32);
// impl_cast!(Asi8s, u8x32, i8x32, as_i8s, as_i8x32);

// impl_cast!(Asu8s, i8x64, u8x64, as_u8s, as_u8x64);
// impl_cast!(Asi8s, u8x64, i8x64, as_i8s, as_i8x64);

// impl_cast!(Asu16s, i16x8, u16x8, as_u16s, as_u16x8);
// impl_cast!(Asi16s, u16x8, i16x8, as_i16s, as_i16x8);

// impl_cast!(Asu16s, i16x16, u16x16, as_u16s, as_u16x16);
// impl_cast!(Asi16s, u16x16, i16x16, as_i16s, as_i16x16);

// impl_cast!(Asu16s, i16x32, u16x32, as_u16s, as_u16x32);
// impl_cast!(Asi16s, u16x32, i16x32, as_i16s, as_i16x32);

// impl_cast!(Asu32s, i32x4, u32x4, as_u32s, as_u32x4);
// impl_cast!(Asu32s, f32x4, u32x4, as_u32s, as_u32x4);
// impl_cast!(Asi32s, f32x4, i32x4, as_i32s, as_i32x4);
// impl_cast!(Asi32s, u32x4, i32x4, as_i32s, as_i32x4);
// impl_cast!(Asf32s, u32x4, f32x4, as_f32s, as_f32x4);
// impl_cast!(Asf32s, i32x4, f32x4, as_f32s, as_f32x4);

// impl_cast!(Asu32s, i32x8, u32x8, as_u32s, as_u32x8);
// impl_cast!(Asu32s, f32x8, u32x8, as_u32s, as_u32x8);
// impl_cast!(Asi32s, f32x8, i32x8, as_i32s, as_i32x8);
// impl_cast!(Asi32s, u32x8, i32x8, as_i32s, as_i32x8);
// impl_cast!(Asf32s, u32x8, f32x8, as_f32s, as_f32x8);
// impl_cast!(Asf32s, i32x8, f32x8, as_f32s, as_f32x8);

// impl_cast!(Asu32s, i32x16, u32x16, as_u32s, as_u32x16);
// impl_cast!(Asu32s, f32x16, u32x16, as_u32s, as_u32x16);
// impl_cast!(Asi32s, f32x16, i32x16, as_i32s, as_i32x16);
// impl_cast!(Asi32s, u32x16, i32x16, as_i32s, as_i32x16);
// impl_cast!(Asf32s, u32x16, f32x16, as_f32s, as_f32x16);
// impl_cast!(Asf32s, i32x16, f32x16, as_f32s, as_f32x16);

// impl_cast!(Asu64s, i64x2, u64x2, as_u64s, as_u64x2);
// impl_cast!(Asu64s, f64x2, u64x2, as_u64s, as_u64x2);
// impl_cast!(Asi64s, f64x2, i64x2, as_i64s, as_i64x2);
// impl_cast!(Asi64s, u64x2, i64x2, as_i64s, as_i64x2);
// impl_cast!(Asf64s, u64x2, f64x2, as_f64s, as_f64x2);
// impl_cast!(Asf64s, i64x2, f64x2, as_f64s, as_f64x2);

// impl_cast!(Asu64s, i64x4, u64x4, as_u64s, as_u64x4);
// impl_cast!(Asu64s, f64x4, u64x4, as_u64s, as_u64x4);
// impl_cast!(Asi64s, f64x4, i64x4, as_i64s, as_i64x4);
// impl_cast!(Asi64s, u64x4, i64x4, as_i64s, as_i64x4);
// impl_cast!(Asf64s, u64x4, f64x4, as_f64s, as_f64x4);
// impl_cast!(Asf64s, i64x4, f64x4, as_f64s, as_f64x4);

// impl_cast!(Asu64s, i64x8, u64x8, as_u64s, as_u64x8);
// impl_cast!(Asu64s, f64x8, u64x8, as_u64s, as_u64x8);
// impl_cast!(Asi64s, f64x8, i64x8, as_i64s, as_i64x8);
// impl_cast!(Asi64s, u64x8, i64x8, as_i64s, as_i64x8);
// impl_cast!(Asf64s, u64x8, f64x8, as_f64s, as_f64x8);
// impl_cast!(Asf64s, i64x8, f64x8, as_f64s, as_f64x8);