1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
#![cfg(target_feature = "ssse3")]
use super::*;
/// Lanewise absolute value with lanes as `i8`.
///
/// This is a "wrapping" absolute value, so `i8::MIN` stays as `i8::MIN`.
///
/// * **Intrinsic:** [`_mm_abs_epi8`]
/// * **Assembly:** `pabsb xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn abs_i8_m128i(a: m128i) -> m128i {
m128i(unsafe { _mm_abs_epi8(a.0) })
}
/// Lanewise absolute value with lanes as `i16`.
///
/// This is a "wrapping" absolute value, so `i16::MIN` stays as `i16::MIN`.
///
/// * **Intrinsic:** [`_mm_abs_epi16`]
/// * **Assembly:** `pabsw xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn abs_i16_m128i(a: m128i) -> m128i {
m128i(unsafe { _mm_abs_epi16(a.0) })
}
/// Lanewise absolute value with lanes as `i32`.
///
/// This is a "wrapping" absolute value, so `i32::MIN` stays as `i32::MIN`.
///
/// * **Intrinsic:** [`_mm_abs_epi32`]
/// * **Assembly:** `pabsd xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn abs_i32_m128i(a: m128i) -> m128i {
m128i(unsafe { _mm_abs_epi32(a.0) })
}
/// Counts `$a` as the high bytes and `$b` as the low bytes then performs a
/// **byte** shift to the right by the immediate value.
///
/// Remember that this is all little-endian data.
///
/// * **Intrinsic:** [`_mm_alignr_epi8`]
/// * **Assembly:** `palignr xmm, xmm, imm8`
pub fn combined_byte_shr_imm_m128i<const IMM: i32>(
a: m128i, b: m128i,
) -> m128i {
m128i(unsafe { _mm_alignr_epi8(a.0, b.0, IMM) })
}
/// Add horizontal pairs of `i16` values, pack the outputs as `a` then `b`.
///
/// * **Intrinsic:** [`_mm_hadd_epi16`]
/// * **Assembly:** `phaddw xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn add_horizontal_i16_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_hadd_epi16(a.0, b.0) })
}
/// Add horizontal pairs of `i32` values, pack the outputs as `a` then `b`.
///
/// * **Intrinsic:** [`_mm_hadd_epi32`]
/// * **Assembly:** `phaddd xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn add_horizontal_i32_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_hadd_epi32(a.0, b.0) })
}
/// Add horizontal pairs of `i16` values, saturating, pack the outputs as `a`
/// then `b`.
///
/// * **Intrinsic:** [`_mm_hadds_epi16`]
/// * **Assembly:** `phaddsw xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn add_horizontal_saturating_i16_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_hadds_epi16(a.0, b.0) })
}
/// Subtract horizontal pairs of `i16` values, pack the outputs as `a` then `b`.
///
/// * **Intrinsic:** [`_mm_hsub_epi16`]
/// * **Assembly:** `phsubw xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn sub_horizontal_i16_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_hsub_epi16(a.0, b.0) })
}
/// Subtract horizontal pairs of `i32` values, pack the outputs as `a` then `b`.
///
/// * **Intrinsic:** [`_mm_hsub_epi32`]
/// * **Assembly:** `phsubd xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn sub_horizontal_i32_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_hsub_epi32(a.0, b.0) })
}
/// Subtract horizontal pairs of `i16` values, saturating, pack the outputs as
/// `a` then `b`.
///
/// * **Intrinsic:** [`_mm_hsubs_epi16`]
/// * **Assembly:** `phsubsw xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn sub_horizontal_saturating_i16_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_hsubs_epi16(a.0, b.0) })
}
/// This is dumb and weird.
///
/// * Vertically multiplies each `u8` lane from `a` with an `i8` lane from `b`,
/// producing an `i16` intermediate value.
/// * These intermediate `i16` values are horizontally added with saturation.
///
/// * **Intrinsic:** [`_mm_maddubs_epi16`]
/// * **Assembly:** `pmaddubsw xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn mul_u8i8_add_horizontal_saturating_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_maddubs_epi16(a.0, b.0) })
}
/// Multiply `i16` lanes into `i32` intermediates, keep the high 18 bits, round
/// by adding 1, right shift by 1.
///
/// This is `_mm_mulhrs_epi16`, which I can only assume is named for something
/// like "high bits rounded and scaled".
///
/// * **Intrinsic:** [`_mm_mulhrs_epi16`]
/// * **Assembly:** `pmulhrsw xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn mul_i16_scale_round_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_mulhrs_epi16(a.0, b.0) })
}
/// Shuffle `i8` lanes in `a` using `i8` values in `v`.
///
/// If a lane in `v` is negative, that output is zeroed.
///
/// * **Intrinsic:** [`_mm_shuffle_epi8`]
/// * **Assembly:** `pshufb xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn shuffle_av_i8z_all_m128i(a: m128i, v: m128i) -> m128i {
m128i(unsafe { _mm_shuffle_epi8(a.0, v.0) })
}
/// Applies the sign of `i8` values in `b` to the values in `a`.
///
/// * If `b` is negative: the `a` value is negated.
/// * Else If `b` is 0: the `a` value becomes 0.
/// * Else the `a` value is unchanged.
///
/// * **Intrinsic:** [`_mm_sign_epi8`]
/// * **Assembly:** `psignb xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn sign_apply_i8_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_sign_epi8(a.0, b.0) })
}
/// Applies the sign of `i16` values in `b` to the values in `a`.
///
/// * If `b` is negative: the `a` value is negated.
/// * Else If `b` is 0: the `a` value becomes 0.
/// * Else the `a` value is unchanged.
///
/// * **Intrinsic:** [`_mm_sign_epi16`]
/// * **Assembly:** `psignw xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn sign_apply_i16_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_sign_epi16(a.0, b.0) })
}
/// Applies the sign of `i32` values in `b` to the values in `a`.
///
/// * If `b` is negative: the `a` value is negated.
/// * Else If `b` is 0: the `a` value becomes 0.
/// * Else the `a` value is unchanged.
///
/// * **Intrinsic:** [`_mm_sign_epi32`]
/// * **Assembly:** `psignd xmm, xmm`
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "ssse3")))]
pub fn sign_apply_i32_m128i(a: m128i, b: m128i) -> m128i {
m128i(unsafe { _mm_sign_epi32(a.0, b.0) })
}