1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
//! Traits for SIMD operations.
use std::ops::{Add, Sub, Mul, Div, Neg, BitAnd, Deref};
pub trait SimdF32: Sized + Copy + Clone
+ Add<Self, Output=Self> + Add<f32, Output=Self>
+ Sub<Self, Output=Self> + Sub<f32, Output=Self>
+ Mul<Self, Output=Self> + Mul<f32, Output=Self>
+ Div<Self, Output=Self> + Mul<f32, Output=Self>
+ Neg<Output=Self>
// The following would be convenient but run into limitations in the Rust
// type system, which might be fixed at some point.
// See Rust issue #23856 and a large number of related ones.
/*
where f32: Add<Self, Output=Self>,
f32: Sub<Self, Output=Self>,
f32: Mul<Self, Output=Self>,
*/
{
type Raw: From<Self>;
type Mask: SimdMask32<F32 = Self>;
// Maybe doesn't need self?
fn width(self) -> usize;
/// Returns the largest integer less than or equal to a number.
fn floor(self) -> Self;
/// Returns the smallest integer greater than or equal to a number.
fn ceil(self) -> Self;
/// Round a float to the nearest integer.
///
/// The behavior on a tie is unspecified, and will be whatever is
/// fastest on a given implementation. The ideal behavior is to round
/// to the nearest even integer on tie; note that this is different
/// than `f32::round`.
///
/// See https://github.com/rust-lang/rust/issues/55107 for discussion.
fn round(self) -> Self;
/// Returns the absolute value of a number.
fn abs(self) -> Self;
/// Minimum of two values.
fn min(self, other: Self) -> Self;
/// Maximum of two values.
fn max(self, other: Self) -> Self;
/// Compute approximate reciprocal, to 8 bits of precision.
fn recip8(self) -> Self { self.recip11() }
/// Compute approximate reciprocal, to 11 bits of precision.
fn recip11(self) -> Self { self.recip14() }
/// Compute approximate reciprocal, to 14 bits of precision.
fn recip14(self) -> Self { self.recip16() }
/// Compute approximate reciprocal, to 16 bits of precision.
fn recip16(self) -> Self { self.recip22() }
/// Compute approximate reciprocal, to 22 bits of precision.
fn recip22(self) -> Self { self.recip() }
/// Compute reciprocal, to IEEE precision standards.
fn recip(self) -> Self;
/// Compute approximate reciprocal square root, to 8 bits of precision.
fn rsqrt8(self) -> Self { self.rsqrt11() }
/// Compute approximate reciprocal square root, to 11 bits of precision.
fn rsqrt11(self) -> Self { self.rsqrt14() }
/// Compute approximate reciprocal square root, to 14 bits of precision.
fn rsqrt14(self) -> Self { self.rsqrt16() }
/// Compute approximate reciprocal square root, to 16 bits of precision.
fn rsqrt16(self) -> Self { self.rsqrt22() }
/// Compute approximate reciprocal square root, to 22 bits of precision.
fn rsqrt22(self) -> Self { self.rsqrt() }
/// Compute reciprocal square root, to IEEE precision standards.
fn rsqrt(self) -> Self;
/// Repeat a scalar in all lanes.
///
/// Note: self is unused but is needed for safety.
fn splat(self, x: f32) -> Self;
/// Create SIMD that contains the lane number.
///
/// For example, for 4 lanes, it is [0.0, 1.0, 2.0, 3.0].
///
/// Note: self is unused but is needed for safety.
fn steps(self) -> Self;
/// Create from a raw value. Marked as unsafe because it requires that the
/// corresponding target_feature is enabled.
unsafe fn from_raw(raw: Self::Raw) -> Self;
unsafe fn load(p: *const f32) -> Self;
/// Load from a slice.
///
/// # Panics
///
/// If `slice.len() < Self::width()`.
///
/// Note: self is unused but is needed for safety.
fn from_slice(self, slice: &[f32]) -> Self {
unsafe {
assert!(slice.len() >= self.width());
Self::load(slice.as_ptr())
}
}
unsafe fn store(self, p: *mut f32);
/// Write into a slice.
///
/// # Panics
///
/// If `slice.len() < Self::width()`.
///
/// Note: self is unused but is needed for safety.
fn write_to_slice(self, slice: &mut [f32]) {
unsafe {
assert!(slice.len() >= self.width());
self.store(slice.as_mut_ptr());
}
}
/// Create an instance (zero but value is usually ignored). Marked
/// as unsafe because it requires that the corresponding target_feature
/// is enabled.
unsafe fn create() -> Self;
fn eq(self, other: Self) -> Self::Mask;
// TODO: other comparisons
}
/// A type compatible with an f32 simd value, representing a boolean in each lane.
pub trait SimdMask32: Sized + Copy + Clone
+ BitAnd<Self, Output=Self>
where Self::Raw: From<Self>,
{
type Raw;
/// The corresponding compatible f32 type (with the same width).
type F32: SimdF32<Mask = Self>;
/// Select an element from `a` where the mask is true, and from `b`
/// otherwise.
fn select(self, a: Self::F32, b: Self::F32) -> Self::F32;
}
pub trait F32x4: Sized + Copy + Clone
+ Add<Self, Output=Self>
+ Mul + Mul<f32, Output=Self>
+ Deref<Target=[f32; 4]>
where Self::Raw: From<Self>,
// Again bitten by Rust #23856.
/*
[f32; 4]: From<Self>,
*/
{
type Raw;
/// Create an instance (zero but value is usually ignored). Marked
/// as unsafe because it requires that the corresponding target_feature
/// is enabled.
unsafe fn create() -> Self;
/// Create from a raw value. Marked as unsafe because it requires that the
/// corresponding target_feature is enabled.
unsafe fn from_raw(raw: Self::Raw) -> Self;
/// Note: self is unused but is needed for safety.
fn new(self, array: [f32; 4]) -> Self;
// This is probably not needed since we have deref.
fn as_vec(self) -> [f32; 4];
}