[][src]Struct lokacore::arch::x86_64::m128

#[repr(transparent)]
pub struct m128(pub __m128);

A 128-bit SIMD value. Always used as f32x4.

  • This documentation numbers the lanes based on the index you'd need to use to access that lane if the value were cast to an array.
  • This is also the way that the type is printed out using Debug, Display, LowerExp, and UpperExp.
  • This is not necessarily the ordering you'll see if you look an xmm register in a debugger! Basically because of how little-endian works.
  • Most operations work per-lane, "lanewise".
  • Some operations work using lane 0 only. When appropriate, these have the same name as the lanewise version but with a 0 on the end (example: cmp_eq and cmp_eq0). With the 0 version the other lanes are simply copied forward from self.
  • Comparisons give "bool-ish" output, where all bits 1 in a lane is true, and all bits 0 in a lane is false. Unfortunately, all bits 1 with an f32 is one of the NaN values, and NaN != NaN, so it can be a little tricky to work with until you're used to it.

Methods

impl m128[src]

pub fn add0(self, rhs: Self) -> Self[src]

Adds the 0th lanes without affecting the other lanes of `self.

pub fn andnot(self, rhs: Self) -> Self[src]

Bitwise (!self) & rhs

pub fn cmp_eq(self, rhs: Self) -> Self[src]

Lanewise self == rhs check, bool-ish output.

pub fn cmp_eq0(self, rhs: Self) -> Self[src]

Lane 0: self == rhs, bool-ish output.

pub fn cmp_ge(self, rhs: Self) -> Self[src]

Lanewise self >= rhs check, bool-ish output.

pub fn cmp_ge0(self, rhs: Self) -> Self[src]

Lane 0: self >= rhs, bool-ish output.

pub fn cmp_gt(self, rhs: Self) -> Self[src]

Lanewise self > rhs check, bool-ish output.

pub fn cmp_gt0(self, rhs: Self) -> Self[src]

Lane 0: self > rhs, bool-ish output.

pub fn cmp_le(self, rhs: Self) -> Self[src]

Lanewise self <= rhs check, bool-ish output.

pub fn cmp_le0(self, rhs: Self) -> Self[src]

Lane 0: self <= rhs, bool-ish output.

pub fn cmp_lt(self, rhs: Self) -> Self[src]

Lanewise self < rhs check, bool-ish output.

pub fn cmp_lt0(self, rhs: Self) -> Self[src]

Lane 0: self < rhs, bool-ish output.

pub fn cmp_ne(self, rhs: Self) -> Self[src]

Lanewise self != rhs check, bool-ish output.

pub fn cmp_ne0(self, rhs: Self) -> Self[src]

Lane 0: self != rhs, bool-ish output.

pub fn cmp_nge(self, rhs: Self) -> Self[src]

Lanewise !(self >= rhs) check, bool-ish output.

Also, this triggers 3rd Impact.

pub fn cmp_nge0(self, rhs: Self) -> Self[src]

Lane 0: !(self >= rhs), bool-ish output.

pub fn cmp_ngt(self, rhs: Self) -> Self[src]

Lanewise !(self > rhs) check, bool-ish output.

pub fn cmp_ngt0(self, rhs: Self) -> Self[src]

Lane 0: !(self > rhs), bool-ish output.

pub fn cmp_nle(self, rhs: Self) -> Self[src]

Lanewise !(self <= rhs) check, bool-ish output.

pub fn cmp_nle0(self, rhs: Self) -> Self[src]

Lane 0: !(self <= rhs), bool-ish output.

pub fn cmp_nlt(self, rhs: Self) -> Self[src]

Lanewise !(self < rhs) check, bool-ish output.

pub fn cmp_nlt0(self, rhs: Self) -> Self[src]

Lane 0: !(self < rhs), bool-ish output.

pub fn cmp_ordinary(self, rhs: Self) -> Self[src]

Lanewise self.not_nan() & rhs.not_nan() check, bool-ish output.

pub fn cmp_ordinary0(self, rhs: Self) -> Self[src]

Lane 0: self.not_nan() & rhs.not_nan(), bool-ish output.

pub fn cmp_nan(self, rhs: Self) -> Self[src]

Lanewise self.is_nan() | rhs.is_nan() check, bool-ish output.

pub fn cmp_nan0(self, rhs: Self) -> Self[src]

Lane 0: self.is_nan() | rhs.is_nan(), bool-ish output.

pub fn cmpi_eq0(self, rhs: Self) -> i32[src]

Lane 0: self == rhs, 0 or 1 i32 output.

pub fn cmpi_ge0(self, rhs: Self) -> i32[src]

Lane 0: self >= rhs, 0 or 1 i32 output.

pub fn cmpi_gt0(self, rhs: Self) -> i32[src]

Lane 0: self > rhs, 0 or 1 i32 output.

pub fn cmpi_le0(self, rhs: Self) -> i32[src]

Lane 0: self <= rhs, 0 or 1 i32 output.

pub fn cmpi_lt0(self, rhs: Self) -> i32[src]

Lane 0: self < rhs, 0 or 1 i32 output.

pub fn cmpi_ne0(self, rhs: Self) -> i32[src]

Lane 0: self != rhs, 0 or 1 i32 output.

pub fn round_replace0_i32(self, rhs: i32) -> Self[src]

Round the i32 to f32 and replace lane 0.

Subject to the current thread's rounding mode

pub fn round_extract0_i32(self) -> i32[src]

Round lane 0 to i32 and return.

Subject to the current thread's rounding mode

pub fn round_replace0_i64(self, rhs: i64) -> Self[src]

Round the i64 to f32 and replace lane 0.

Subject to the current thread's rounding mode

Not available to x86

pub fn extract0_f32(self) -> f32[src]

Directly extracts lane 0 as f32.

pub fn round_extract0_i64(self) -> i64[src]

Round lane 0 to i64 and return.

Subject to the current thread's rounding mode

pub fn truncate_extract0_i32(self) -> i32[src]

Truncate lane 0 to i32 and return.

pub fn truncate_extract0_i64(self) -> i64[src]

Truncate lane 0 to i64 and return.

pub fn div0(self, rhs: Self) -> Self[src]

Divides the 0th lanes without affecting the other lanes of `self.

pub fn load(addr: &Align16<[f32; 4]>) -> Self[src]

Loads a 16-byte aligned f32 array address into an m128.

This produces the same lane order as you'd get if you de-referenced the pointed to array and then used transmute.

pub fn load_splat(addr: &f32) -> Self[src]

Loads the f32 address into all lanes.

pub fn load0(addr: &f32) -> Self[src]

Loads the f32 address into lane 0, other lanes are 0.0.

pub fn load_reverse(addr: &Align16<[f32; 4]>) -> Self[src]

Loads 16-byte aligned f32s into an m128.

This produces the reverse lane order as you'd get if you used a transmute on the pointed to array.

pub fn load_unaligned(addr: &[f32; 4]) -> Self[src]

Loads 16-byte f32s into an m128.

This doesn't have the alignment requirements of load, but the lane ordering is the same.

pub fn max(self, rhs: Self) -> Self[src]

Lanewise maximum.

pub fn max0(self, rhs: Self) -> Self[src]

Lane 0 maximum, other lanes are self.

pub fn min(self, rhs: Self) -> Self[src]

Lanewise minimum.

pub fn min0(self, rhs: Self) -> Self[src]

Lane 0 minimum, other lanes are self.

pub fn copy0(self, rhs: Self) -> Self[src]

Copies lane 0 from rhs, other lanes are self.

pub fn copy_high_low(self, rhs: Self) -> Self[src]

Copy the high two lanes of rhs over top of the low two lanes of self, other lanes unchanged.

out[0] = rhs[2]
out[1] = rhs[3]
out[2] = self[2]
out[3] = self[3]

pub fn copy_low_high(self, rhs: Self) -> Self[src]

Copy the low two lanes of rhs over top of the high two lanes of self, other lanes unchanged.

out[0] = self[0]
out[1] = self[1]
out[2] = rhs[0]
out[3] = rhs[1]

pub fn move_mask(self) -> i32[src]

Assumes that this is a bool-ish mask and packs it into an i32.

Specifically, the output i32 has bits 0/1/2/3 set to be the same as the most significant bit in lanes 0/1/2/3 of self.

(Yes, this name is kinda stupid but I couldn't come up with a better thing to rename it to, oh well.)

pub fn reciprocal(self) -> Self[src]

Lanewise approximate reciprocal.

The maximum relative error for this approximation is less than 1.5*2.0e-12.

pub fn reciprocal0(self) -> Self[src]

Lane 0 approximate reciprocal, other lanes are self.

The maximum relative error for this approximation is less than 1.5*2.0e-12.

pub fn reciprocal_sqrt(self) -> Self[src]

Lanewise approximate reciprocal of the square root.

The maximum relative error for this approximation is less than 1.5*2.0e-12.

pub fn reciprocal_sqrt0(self) -> Self[src]

Lane 0 approximate reciprocal of the square root, other lanes are self.

The maximum relative error for this approximation is less than 1.5*2.0e-12.

pub fn set(a: f32, b: f32, c: f32, d: f32) -> Self[src]

Set four f32 values into an m128.

Because of how little-endian works, this produces the opposite lane order as you'd get compared to putting the arguments in to an array and then using load on that array. Same with using transmute or similar.

pub fn splat(a: f32) -> Self[src]

Set the f32 into all lanes.

pub fn set0(a: f32) -> Self[src]

Set the value into lane 0, other lanes 0.0.

pub fn set_reverse(a: f32, b: f32, c: f32, d: f32) -> Self[src]

Set four f32 values into an m128, order reversed from normal set.

pub fn sqrt(self) -> Self[src]

Lanewise square root.

pub fn sqrt0(self) -> Self[src]

Lane 0 square root, other lanes are self.

pub fn store(self, addr: &mut Align16<[f32; 4]>)[src]

Stores an m128 into a 16-byte aligned f32 array address.

This uses the same lane order as load.

pub fn store0_all(self, addr: &mut Align16<[f32; 4]>)[src]

Stores lane 0 to all indexes of the array.

pub fn store0(self, addr: &mut f32)[src]

Stores lane 0 to the address given.

pub fn store_reverse(self, addr: &mut Align16<[f32; 4]>)[src]

Stores an m128 into a 16-byte aligned f32 array address.

This uses the same lane order as load_reverse.

pub fn store_unaligned(self, addr: &mut [f32; 4])[src]

Stores an m128 into a f32 array address.

This doesn't have the alignment requirements of store, but the lane ordering is the same.

pub fn sub0(self, rhs: Self) -> Self[src]

Subtracts the 0th lanes without affecting the other lanes of `self.

pub fn unpack_high(self, rhs: Self) -> Self[src]

Unpack and interleave the high lanes of self and rhs.

out[0] = self[2]
out[1] = rhs[2]
out[2] = self[3]
out[3] = rhs[3]

pub fn unpack_low(self, rhs: Self) -> Self[src]

Unpack and interleave the low lanes of self and rhs.

out[0] = self[0]
out[1] = rhs[0]
out[2] = self[1]
out[3] = rhs[1]

impl m128[src]

pub fn abs(self) -> Self[src]

[non-intrinsic] Lanewise absolute value.

This is not an official Intel intrinsic, instead it's a bitand operation with a mask so that the sign bit is cleared in all lanes.

impl m128[src]

pub fn round_i32x4(self) -> m128i[src]

This rounds each lane to i32.

pub fn truncate_i32x4(self) -> m128i[src]

This truncates each lane to i32.

pub fn round_f64x2(self) -> m128d[src]

This "rounds" the lower two lanes to f64.

f64 has more precision than f32 so there's no actually rounding going on here, but I'll just call it rounding so that the naming stays consistent with other similar methods.

pub fn f64_round_copy0(self, rhs: m128d) -> Self[src]

Lane 0 is the low f64 of rhs rounded to f32, other lanes are self.

pub fn cast_m128i(self) -> m128i[src]

Cast the bits of this m128 directly to m128i without modification.

impl m128[src]

pub fn add_sub(self, rhs: Self) -> Self[src]

Adds odd lanes (3 and 1) and subtracts even lanes (2 and 0).

out[0]= self[0] - rhs[0]
out[1]= self[1] + rhs[1]
out[2]= self[2] - rhs[2]
out[3]= self[3] + rhs[3]

pub fn horizontal_add(self, rhs: Self) -> Self[src]

Horizontal add both self and rhs, then pack together.

out[0]= self[0] + self[1]
out[1]= self[2] + self[3]
out[2]= rhs[0] + rhs[1]
out[3]= rhs[2] + rhs[3]

pub fn horizontal_sub(self, rhs: Self) -> Self[src]

Horizontal subtract both self and rhs, then pack together.

out[0]= self[0] - self[1]
out[1]= self[2] - self[3]
out[2]= rhs[0] - rhs[1]
out[3]= rhs[2] - rhs[3]

pub fn duplicate_odd(self) -> Self[src]

Duplicate odd indexed lanes into a new m128.

out[0]= self[1]
out[1]= self[1]
out[2]= self[3]
out[3]= self[3]

pub fn duplicate_even(self) -> Self[src]

Duplicate even indexed lanes into a new m128.

out[0]= self[0]
out[1]= self[0]
out[2]= self[2]
out[3]= self[2]

Trait Implementations

impl Zeroable for m128[src]

fn zeroed() -> Self[src]

Calls zeroed. Read more

impl Pod for m128[src]

impl Debug for m128[src]

fn fmt(&self, f: &mut Formatter) -> Result[src]

Debug formats in offset order.

All Formatter information is passed directly to each individual f32 lane being formatted.

impl Display for m128[src]

fn fmt(&self, f: &mut Formatter) -> Result[src]

Display formats in offset order.

All Formatter information is passed directly to each individual f32 lane being formatted.

impl LowerExp for m128[src]

fn fmt(&self, f: &mut Formatter) -> Result[src]

LowerExp formats in offset order.

All Formatter information is passed directly to each individual f32 lane being formatted.

impl UpperExp for m128[src]

fn fmt(&self, f: &mut Formatter) -> Result[src]

UpperExp formats in offset order.

All Formatter information is passed directly to each individual f32 lane being formatted.

impl Add<m128> for m128[src]

type Output = Self

The resulting type after applying the + operator.

fn add(self, rhs: Self) -> Self[src]

Lanewise addition.

impl Sub<m128> for m128[src]

type Output = Self

The resulting type after applying the - operator.

fn sub(self, rhs: Self) -> Self[src]

Lanewise subtraction.

impl Mul<m128> for m128[src]

type Output = Self

The resulting type after applying the * operator.

fn mul(self, rhs: Self) -> Self[src]

Lanewise multiplication.

impl Div<m128> for m128[src]

type Output = Self

The resulting type after applying the / operator.

fn div(self, rhs: Self) -> Self[src]

Lanewise division.

impl Neg for m128[src]

type Output = Self

The resulting type after applying the - operator.

fn neg(self) -> Self[src]

Lanewise 0.0 - self

impl AddAssign<m128> for m128[src]

fn add_assign(&mut self, rhs: Self)[src]

Lanewise addition.

impl SubAssign<m128> for m128[src]

fn sub_assign(&mut self, rhs: Self)[src]

Lanewise subtraction.

impl MulAssign<m128> for m128[src]

fn mul_assign(&mut self, rhs: Self)[src]

Lanewise multiplication.

impl DivAssign<m128> for m128[src]

fn div_assign(&mut self, rhs: Self)[src]

Lanewise division.

impl Not for m128[src]

type Output = Self

The resulting type after applying the ! operator.

fn not(self) -> Self[src]

Bitwise negation

impl BitAnd<m128> for m128[src]

type Output = Self

The resulting type after applying the & operator.

fn bitand(self, rhs: Self) -> Self[src]

Bitwise AND.

impl BitOr<m128> for m128[src]

type Output = Self

The resulting type after applying the | operator.

fn bitor(self, rhs: Self) -> Self[src]

Bitwise OR.

impl BitXor<m128> for m128[src]

type Output = Self

The resulting type after applying the ^ operator.

fn bitxor(self, rhs: Self) -> Self[src]

Bitwise XOR.

impl BitAndAssign<m128> for m128[src]

fn bitand_assign(&mut self, rhs: Self)[src]

Bitwise AND.

impl BitOrAssign<m128> for m128[src]

fn bitor_assign(&mut self, rhs: Self)[src]

Bitwise OR.

impl BitXorAssign<m128> for m128[src]

fn bitxor_assign(&mut self, rhs: Self)[src]

Bitwise XOR.

impl Copy for m128[src]

impl Clone for m128[src]

fn clone_from(&mut self, source: &Self)1.0.0[src]

Performs copy-assignment from source. Read more

Auto Trait Implementations

impl Unpin for m128

impl Send for m128

impl Sync for m128

Blanket Implementations

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T> From<T> for T[src]

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> Any for T where
    T: 'static + ?Sized
[src]