1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
//! Traits for SIMD operations.

use std::ops::{Add, Sub, Mul, Div, Neg, BitAnd, Deref};

pub trait SimdF32: Sized + Copy + Clone
    + Add<Self, Output=Self> + Add<f32, Output=Self>
    + Sub<Self, Output=Self> + Sub<f32, Output=Self>
    + Mul<Self, Output=Self> + Mul<f32, Output=Self>
    + Div<Self, Output=Self> + Mul<f32, Output=Self>
    + Neg<Output=Self>
    // The following would be convenient but run into limitations in the Rust
    // type system, which might be fixed at some point.
    // See Rust issue #23856 and a large number of related ones.
    /*
    where f32: Add<Self, Output=Self>,
    f32: Sub<Self, Output=Self>,
    f32: Mul<Self, Output=Self>,
    */
{
    type Raw: From<Self>;

    type Mask: SimdMask32<F32 = Self>;

    // Maybe doesn't need self?
    fn width(self) -> usize;

    /// Returns the largest integer less than or equal to a number.
    fn floor(self) -> Self;

    /// Returns the smallest integer greater than or equal to a number.
    fn ceil(self) -> Self;

    /// Round a float to the nearest integer.
    ///
    /// The behavior on a tie is unspecified, and will be whatever is
    /// fastest on a given implementation. The ideal behavior is to round
    /// to the nearest even integer on tie; note that this is different
    /// than `f32::round`.
    ///
    /// See https://github.com/rust-lang/rust/issues/55107 for discussion.
    fn round(self) -> Self;

    /// Returns the absolute value of a number.
    fn abs(self) -> Self;

    /// Minimum of two values.
    fn min(self, other: Self) -> Self;

    /// Maximum of two values.
    fn max(self, other: Self) -> Self;

    /// Compute approximate reciprocal, to 8 bits of precision.
    fn recip8(self) -> Self { self.recip11() }

    /// Compute approximate reciprocal, to 11 bits of precision.
    fn recip11(self) -> Self { self.recip14() }

    /// Compute approximate reciprocal, to 14 bits of precision.
    fn recip14(self) -> Self { self.recip16() }

    /// Compute approximate reciprocal, to 16 bits of precision.
    fn recip16(self) -> Self { self.recip22() }

    /// Compute approximate reciprocal, to 22 bits of precision.
    fn recip22(self) -> Self { self.recip() }

    /// Compute reciprocal, to IEEE precision standards.
    fn recip(self) -> Self;

    /// Compute approximate reciprocal square root, to 8 bits of precision.
    fn rsqrt8(self) -> Self { self.rsqrt11() }

    /// Compute approximate reciprocal square root, to 11 bits of precision.
    fn rsqrt11(self) -> Self { self.rsqrt14() }

    /// Compute approximate reciprocal square root, to 14 bits of precision.
    fn rsqrt14(self) -> Self { self.rsqrt16() }

    /// Compute approximate reciprocal square root, to 16 bits of precision.
    fn rsqrt16(self) -> Self { self.rsqrt22() }

    /// Compute approximate reciprocal square root, to 22 bits of precision.
    fn rsqrt22(self) -> Self { self.rsqrt() }

    /// Compute reciprocal square root, to IEEE precision standards.
    fn rsqrt(self) -> Self;

    /// Repeat a scalar in all lanes.
    ///
    /// Note: self is unused but is needed for safety.
    fn splat(self, x: f32) -> Self;

    /// Create SIMD that contains the lane number.
    ///
    /// For example, for 4 lanes, it is [0.0, 1.0, 2.0, 3.0].
    ///
    /// Note: self is unused but is needed for safety.
    fn steps(self) -> Self;

    /// Create from a raw value. Marked as unsafe because it requires that the
    /// corresponding target_feature is enabled.
    unsafe fn from_raw(raw: Self::Raw) -> Self;

    unsafe fn load(p: *const f32) -> Self;

    /// Load from a slice.
    ///
    /// # Panics
    ///
    /// If `slice.len() < Self::width()`.
    ///
    /// Note: self is unused but is needed for safety.
    fn from_slice(self, slice: &[f32]) -> Self {
        unsafe {
            assert!(slice.len() >= self.width());
            Self::load(slice.as_ptr())
        }
    }

    unsafe fn store(self, p: *mut f32);

    /// Write into a slice.
    ///
    /// # Panics
    ///
    /// If `slice.len() < Self::width()`.
    ///
    /// Note: self is unused but is needed for safety.
    fn write_to_slice(self, slice: &mut [f32]) {
        unsafe {
            assert!(slice.len() >= self.width());
            self.store(slice.as_mut_ptr());
        }
    }

    /// Create an instance (zero but value is usually ignored). Marked
    /// as unsafe because it requires that the corresponding target_feature
    /// is enabled.
    unsafe fn create() -> Self;

    fn eq(self, other: Self) -> Self::Mask;

    // TODO: other comparisons
}

/// A type compatible with an f32 simd value, representing a boolean in each lane.
pub trait SimdMask32: Sized + Copy + Clone
    + BitAnd<Self, Output=Self>
    where Self::Raw: From<Self>,
{
    type Raw;

    /// The corresponding compatible f32 type (with the same width).
    type F32: SimdF32<Mask = Self>;

    /// Select an element from `a` where the mask is true, and from `b`
    /// otherwise.
    fn select(self, a: Self::F32, b: Self::F32) -> Self::F32;
}

pub trait F32x4: Sized + Copy + Clone
    + Add<Self, Output=Self>
    + Mul + Mul<f32, Output=Self>
    + Deref<Target=[f32; 4]>
    where Self::Raw: From<Self>,
    // Again bitten by Rust #23856.
    /*
    [f32; 4]: From<Self>,
    */
{
    type Raw;

    /// Create an instance (zero but value is usually ignored). Marked
    /// as unsafe because it requires that the corresponding target_feature
    /// is enabled.
    unsafe fn create() -> Self;

    /// Create from a raw value. Marked as unsafe because it requires that the
    /// corresponding target_feature is enabled.
    unsafe fn from_raw(raw: Self::Raw) -> Self;

    /// Note: self is unused but is needed for safety.
    fn new(self, array: [f32; 4]) -> Self;

    // This is probably not needed since we have deref.
    fn as_vec(self) -> [f32; 4];
}