core_models/core_arch/
x86.rs

1//! A (partial) Rust-based model of [`core::arch::x86`] and [`core::arch::x86_64`].
2//!
3//! This module provides a purely Rust implementation of selected operations from
4//! `core::arch::x86` and `core::arch::x86_64`.
5//!
6//! # Guide — Adding a New Intrinsic to the `core_arch::x86` Model
7//!
8//! ## Introduction — Why & How We Model SIMD
9//!
10//! In order to verify code that uses SIMD, every intrinsic has to be modeled.  
11//! Your crate already provides the scaffolding:
12//!
13//! * **`src/core_arch/x86/opaque.rs`** — list of **empty** intrinsics (stubs).  
14//! * **`src/core_arch/x86/{sse2,avx,avx2,…}.rs`** — where *real* models belong.
15//!
16//! This guide shows an end‑to‑end path:
17//!
18//! 1. **Locate** the intrinsic in upstream docs.  
19//! 2. **Pick a representation** (bit‑vector, integer vector, or both).  
20//! 3. **Move** the item out of `opaque.rs` (if it is defined here).  
21//! 4. **Implement** the model and (optionally) an interpretation with a lift lemma.  
22//! 5. **Test**.  
23//!
24//! ## Choosing Your Semantic Representation
25//!
26//! | Representation | Good for | Extra work |
27//! |----------------|----------|------------|
28//! | **Bit‑vector** (`BitVec<256>`) | Bit-wise proofs, shuffles, masking | None |
29//! | **Integer vector** (`i32x8`, `i64x4`, …) | Lane‑wise integer reasoning | Add lift lemma |
30//!
31//! ## Driving Example – `_mm256_mul_epi32`
32//!
33//! We add the AVX2 "multiply packed 32‑bit integers" intrinsic.
34//!
35//!  1. *Locate* in <https://doc.rust-lang.org/stable/core/arch/x86/> → click **Source**. Path shows `core_arch/src/x86/avx2.rs`: this intrinsics needs to be added to the  `avx2` sub module of `x86.rs`.
36//!  2. **Delete the stub** from `opaque.rs`
37//!  3. **Implement** the bit‑vector spec. (file `x86.rs`, module `avx2`)
38//!  4. *(Optional)* add an `i32x8` interpretation + lift lemma. (file `x86/interpretations.rs`)
39//!  5. **Unit‑test** equivalence.
40//!
41//! ### (step 3) Bit‑Vector Model (if needed)
42//! In the case of `_mm256_mul_epi32` you probably don't want to have a bit vec model. You would have to write a bit vec addition primitive first.
43//! In this case, we just declare an opaque `_mm256_mul_epi32` in `x86::avx2`.
44//!
45//! ```compile_fail
46//! #[hax_lib::opaque]
47//! pub fn _mm256_mul_epi32(_: __m256i, _: __m256i) -> __m256i {
48//!     unimplemented!()
49//! }
50//! ```
51//!
52//! ### (step 4) Integer‑Vector Interpretation & Lift Lemma (if needed)
53//! In `minicore::core_arch::x86::interpretations::int_vec`, we add the following model:
54//!
55//! ```compile_fail
56//! pub fn _mm256_mul_epi32(x: i32x8, y: i32x8) -> i64x4 {
57//!     i64x4::from_fn(|i| (x[i * 2] as i64) * (y[i * 2] as i64))
58//! }
59//! ```
60//!
61//! And a lift lemma in `minicore::core_arch::x86::interpretations::int_vec::lemmas`:
62//! ```compile_fail
63//! mk_lift_lemma!(
64//!     _mm256_mul_epi32(x: __m256i, y: __m256i) ==
65//!     __m256i::from(super::_mm256_mul_epi32(super::i32x8::from(x), super::i32x8::from(y)))
66//! );
67//! ```
68//!
69//! ### (step 5) Unit Test
70//! In `minicore::core_arch::x86::interpretations::int_vec::tests`:
71//! ```compile_fail
72//! mk!(_mm256_mul_epi32(x: BitVec, y: BitVec));
73//! ```
74//!
75//! `mk!` will create a test function that tests that our model of `_mm256_mul_epi32` and its original definition are equivalent for 1000 random values of `x` and `y`.
76//!
77#![allow(clippy::too_many_arguments)]
78
79pub mod interpretations;
80use crate::abstractions::{bit::*, bitvec::*, funarr::*};
81
82pub(crate) mod upstream {
83    #[cfg(target_arch = "x86")]
84    pub use core::arch::x86::*;
85    #[cfg(target_arch = "x86_64")]
86    pub use core::arch::x86_64::*;
87}
88
89/// Conversions impls between `BitVec<N>` and `__mNi` types.
90#[hax_lib::exclude]
91#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
92mod conversions {
93    use super::upstream::{
94        __m128i, __m256, __m256i, _mm256_castps_si256, _mm256_castsi256_ps, _mm256_loadu_si256,
95        _mm256_storeu_si256, _mm_loadu_si128, _mm_storeu_si128,
96    };
97    use super::BitVec;
98
99    impl From<BitVec<256>> for __m256i {
100        fn from(bv: BitVec<256>) -> __m256i {
101            let bv: &[u8] = &bv.to_vec()[..];
102            unsafe { _mm256_loadu_si256(bv.as_ptr() as *const _) }
103        }
104    }
105    impl From<BitVec<256>> for __m256 {
106        fn from(bv: BitVec<256>) -> __m256 {
107            let bv: &[u8] = &bv.to_vec()[..];
108            unsafe { _mm256_castsi256_ps(_mm256_loadu_si256(bv.as_ptr() as *const _)) }
109        }
110    }
111
112    impl From<BitVec<128>> for __m128i {
113        fn from(bv: BitVec<128>) -> __m128i {
114            let slice: &[u8] = &bv.to_vec()[..];
115            unsafe { _mm_loadu_si128(slice.as_ptr() as *const __m128i) }
116        }
117    }
118
119    impl From<__m256i> for BitVec<256> {
120        fn from(vec: __m256i) -> BitVec<256> {
121            let mut v = [0u8; 32];
122            unsafe {
123                _mm256_storeu_si256(v.as_mut_ptr() as *mut _, vec);
124            }
125            BitVec::from_slice(&v[..], 8)
126        }
127    }
128
129    impl From<__m256> for BitVec<256> {
130        fn from(vec: __m256) -> BitVec<256> {
131            let mut v = [0u8; 32];
132            unsafe {
133                _mm256_storeu_si256(v.as_mut_ptr() as *mut _, _mm256_castps_si256(vec));
134            }
135            BitVec::from_slice(&v[..], 8)
136        }
137    }
138
139    impl From<__m128i> for BitVec<128> {
140        fn from(vec: __m128i) -> BitVec<128> {
141            let mut v = [0u8; 16];
142            unsafe {
143                _mm_storeu_si128(v.as_mut_ptr() as *mut _, vec);
144            }
145            BitVec::from_slice(&v[..], 8)
146        }
147    }
148}
149
150#[hax_lib::fstar::replace(
151    r#"
152    unfold type t_e_ee_m256i = $:{__m256i}
153    unfold type t_e_ee_m128i = $:{__m128i}
154"#
155)]
156const _: () = {};
157
158/// 256-bit wide integer vector type.
159/// Models `core::arch::x86::__m256i` or `core::arch::x86_64::__m256i` (the __m256i type defined by Intel, representing a 256-bit SIMD register).
160#[allow(non_camel_case_types)]
161pub type __m256i = BitVec<256>;
162
163/// 128-bit wide integer vector type.
164/// Models `core::arch::x86::__m128i` or `core::arch::x86_64::__m128i` (the __m128i type defined by Intel, representing a 128-bit SIMD register).
165#[allow(non_camel_case_types)]
166pub type __m128i = BitVec<128>;
167
168/// 256-bit wide vector type, which can be interpreted as 8 32 bit floating point values.
169/// Models `core::arch::x86::__m256` or `core::arch::x86_64::__m256`, but since we do not have use and fully support floating points yet, it is treated the same as __m256i.
170#[allow(non_camel_case_types)]
171pub type __m256 = __m256i;
172
173pub use ssse3::*;
174pub mod ssse3 {
175    use super::*;
176    #[hax_lib::opaque]
177    pub fn _mm_shuffle_epi8(vector: __m128i, indexes: __m128i) -> __m128i {
178        let indexes = indexes.to_vec().try_into().unwrap();
179        extra::mm_shuffle_epi8_u8_array(vector, indexes)
180    }
181}
182pub use sse2::*;
183pub mod sse2 {
184    /// This intrinsics is not extracted via hax currently since it cannot hanlde raw pointers.
185    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128&ig_expand=4106)
186    #[hax_lib::exclude]
187    pub unsafe fn _mm_loadu_si128(_mem_addr: *const __m128i) -> __m128i {
188        unimplemented!()
189    }
190
191    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16)
192    #[hax_lib::opaque]
193    pub fn _mm_packs_epi16(_: __m128i, _: __m128i) -> __m128i {
194        unimplemented!()
195    }
196
197    use super::*;
198    #[hax_lib::opaque]
199    pub fn _mm_set_epi8(
200        _e15: i8,
201        _e14: i8,
202        _e13: i8,
203        _e12: i8,
204        _e11: i8,
205        _e10: i8,
206        _e9: i8,
207        _e8: i8,
208        _e7: i8,
209        _e6: i8,
210        _e5: i8,
211        _e4: i8,
212        _e3: i8,
213        _e2: i8,
214        _e1: i8,
215        _e0: i8,
216    ) -> __m128i {
217        todo!()
218    }
219
220    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16)
221    #[hax_lib::opaque]
222    pub fn _mm_set1_epi16(_: i16) -> __m128i {
223        unimplemented!()
224    }
225    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32)
226    #[hax_lib::opaque]
227    pub fn _mm_set_epi32(_: i32, _: i32, _: i32, _: i32) -> __m128i {
228        unimplemented!()
229    }
230
231    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16)
232    #[hax_lib::opaque]
233    pub fn _mm_add_epi16(_: __m128i, _: __m128i) -> __m128i {
234        unimplemented!()
235    }
236    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16)
237    #[hax_lib::opaque]
238    pub fn _mm_sub_epi16(_: __m128i, _: __m128i) -> __m128i {
239        unimplemented!()
240    }
241    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16)
242    #[hax_lib::opaque]
243    pub fn _mm_mullo_epi16(_: __m128i, _: __m128i) -> __m128i {
244        unimplemented!()
245    }
246
247    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16)
248    #[hax_lib::opaque]
249    pub fn _mm_mulhi_epi16(_: __m128i, _: __m128i) -> __m128i {
250        unimplemented!()
251    }
252    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64)
253    #[hax_lib::opaque]
254    pub fn _mm_srli_epi64<const IMM8: i32>(_: __m128i) -> __m128i {
255        unimplemented!()
256    }
257
258    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8)
259    #[hax_lib::opaque]
260    pub fn _mm_movemask_epi8(_: __m128i) -> i32 {
261        unimplemented!()
262    }
263}
264
265pub use avx::*;
266pub mod avx {
267    /// This intrinsics is not extracted via hax currently since it cannot hanlde raw pointers.
268    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_si256)
269    #[hax_lib::exclude]
270    pub unsafe fn _mm256_storeu_si256(_mem_addr: *mut __m256i, _a: __m256i) {
271        unimplemented!()
272    }
273
274    /// This intrinsics is not extracted via hax currently since it cannot hanlde raw pointers.
275    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_si256)
276    #[hax_lib::exclude]
277    pub unsafe fn _mm256_loadu_si256(_mem_addr: *const __m256i) -> __m256i {
278        unimplemented!()
279    }
280
281    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi64x)
282    #[hax_lib::opaque]
283    pub fn _mm256_set1_epi64x(_: i64) -> __m256i {
284        unimplemented!()
285    }
286    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi64x)
287    #[hax_lib::opaque]
288    pub fn _mm256_set_epi64x(_: i64, _: i64, _: i64, _: i64) -> __m256i {
289        unimplemented!()
290    }
291    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_ps)
292    #[hax_lib::opaque]
293    pub fn _mm256_blendv_ps(_: __m256, _: __m256, _: __m256) -> __m256 {
294        unimplemented!()
295    }
296    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi128_si256)
297    #[hax_lib::opaque]
298    pub fn _mm256_castsi128_si256(_: __m128i) -> __m256i {
299        unimplemented!()
300    }
301    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256)
302    #[hax_lib::opaque]
303    pub fn _mm256_testz_si256(_: __m256i, _: __m256i) -> i32 {
304        unimplemented!()
305    }
306    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_ps)
307    #[hax_lib::opaque]
308    pub fn _mm256_castsi256_ps(_: __m256i) -> __m256 {
309        unimplemented!()
310    }
311    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_si256)
312    #[hax_lib::opaque]
313    pub fn _mm256_castps_si256(_: __m256) -> __m256i {
314        unimplemented!()
315    }
316    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_ps)
317    #[hax_lib::opaque]
318    pub fn _mm256_movemask_ps(_: __m256) -> i32 {
319        unimplemented!()
320    }
321
322    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_si256)
323    #[hax_lib::opaque]
324    pub fn _mm256_setzero_si256() -> __m256i {
325        BitVec::from_fn(|_| Bit::Zero)
326    }
327
328    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128i)
329    #[hax_lib::opaque]
330    pub fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
331        BitVec::from_fn(|i| if i < 128 { lo[i] } else { hi[i - 128] })
332    }
333
334    pub use super::*;
335    pub fn _mm256_castsi256_si128(vector: __m256i) -> __m128i {
336        BitVec::from_fn(|i| vector[i])
337    }
338
339    /// This is opaque to Hax: it is defined only via the integer
340    /// interpretation. See `interpretations::int_vec::_mm256_set1_epi32`.
341    #[hax_lib::opaque]
342    pub fn _mm256_set1_epi32(_: i32) -> __m256i {
343        unimplemented!()
344    }
345
346    /// This is opaque to Hax: we have lemmas about this intrinsics
347    /// composed with others. See e.g. `_rw_mm256_sllv_epi32`.
348    #[hax_lib::opaque]
349    pub fn _mm256_set_epi32(
350        _e0: i32,
351        _e1: i32,
352        _e2: i32,
353        _e3: i32,
354        _e4: i32,
355        _e5: i32,
356        _e6: i32,
357        _e7: i32,
358    ) -> __m256i {
359        todo!()
360    }
361
362    /// This is opaque to Hax: we have lemmas about this intrinsics
363    /// composed with others. See e.g. `_rw_mm256_mullo_epi16_shifts`.
364    #[hax_lib::opaque]
365    pub fn _mm256_set_epi16(
366        _e00: i16,
367        _e01: i16,
368        _e02: i16,
369        _e03: i16,
370        _e04: i16,
371        _e05: i16,
372        _e06: i16,
373        _e07: i16,
374        _e08: i16,
375        _e09: i16,
376        _e10: i16,
377        _e11: i16,
378        _e12: i16,
379        _e13: i16,
380        _e14: i16,
381        _e15: i16,
382    ) -> __m256i {
383        todo!()
384    }
385
386    /// This is opaque to Hax: we have lemmas about this intrinsics
387    /// composed with others. See e.g. `_rw_mm256_shuffle_epi8`.
388    #[hax_lib::opaque]
389    pub fn _mm256_set_epi8(
390        _e00: i8,
391        _e01: i8,
392        _e02: i8,
393        _e03: i8,
394        _e04: i8,
395        _e05: i8,
396        _e06: i8,
397        _e07: i8,
398        _e08: i8,
399        _e09: i8,
400        _e10: i8,
401        _e11: i8,
402        _e12: i8,
403        _e13: i8,
404        _e14: i8,
405        _e15: i8,
406        _e16: i8,
407        _e17: i8,
408        _e18: i8,
409        _e19: i8,
410        _e20: i8,
411        _e21: i8,
412        _e22: i8,
413        _e23: i8,
414        _e24: i8,
415        _e25: i8,
416        _e26: i8,
417        _e27: i8,
418        _e28: i8,
419        _e29: i8,
420        _e30: i8,
421        _e31: i8,
422    ) -> __m256i {
423        todo!()
424    }
425    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi16)
426    #[hax_lib::opaque]
427    pub fn _mm256_set1_epi16(_: i16) -> __m256i {
428        unimplemented!()
429    }
430}
431pub use avx2::*;
432pub mod avx2 {
433    use super::*;
434
435    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi32)
436    #[hax_lib::opaque]
437    pub fn _mm256_blend_epi32<const IMM8: i32>(_: __m256i, _: __m256i) -> __m256i {
438        unimplemented!()
439    }
440    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi32)
441    #[hax_lib::opaque]
442    pub fn _mm256_shuffle_epi32<const MASK: i32>(_: __m256i) -> __m256i {
443        unimplemented!()
444    }
445    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi32)
446    #[hax_lib::opaque]
447    pub fn _mm256_sub_epi32(_: __m256i, _: __m256i) -> __m256i {
448        unimplemented!()
449    }
450    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epi32)
451    #[hax_lib::opaque]
452    pub fn _mm256_mul_epi32(_: __m256i, _: __m256i) -> __m256i {
453        unimplemented!()
454    }
455
456    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi16)
457    #[hax_lib::opaque]
458    pub fn _mm256_add_epi16(_: __m256i, _: __m256i) -> __m256i {
459        unimplemented!()
460    }
461    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16)
462    #[hax_lib::opaque]
463    pub fn _mm256_madd_epi16(_: __m256i, _: __m256i) -> __m256i {
464        unimplemented!()
465    }
466    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi32)
467    #[hax_lib::opaque]
468    pub fn _mm256_add_epi32(_: __m256i, _: __m256i) -> __m256i {
469        unimplemented!()
470    }
471    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64)
472    #[hax_lib::opaque]
473    pub fn _mm256_add_epi64(_: __m256i, _: __m256i) -> __m256i {
474        unimplemented!()
475    }
476    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi32)
477    #[hax_lib::opaque]
478    pub fn _mm256_abs_epi32(_: __m256i) -> __m256i {
479        unimplemented!()
480    }
481    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16)
482    #[hax_lib::opaque]
483    pub fn _mm256_sub_epi16(_: __m256i, _: __m256i) -> __m256i {
484        unimplemented!()
485    }
486
487    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16)
488    #[hax_lib::opaque]
489    pub fn _mm256_cmpgt_epi16(_: __m256i, _: __m256i) -> __m256i {
490        unimplemented!()
491    }
492    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32)
493    #[hax_lib::opaque]
494    pub fn _mm256_cmpgt_epi32(_: __m256i, _: __m256i) -> __m256i {
495        unimplemented!()
496    }
497    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32)
498    #[hax_lib::opaque]
499    pub fn _mm256_cmpeq_epi32(_: __m256i, _: __m256i) -> __m256i {
500        unimplemented!()
501    }
502    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi32)
503    #[hax_lib::opaque]
504    pub fn _mm256_sign_epi32(_: __m256i, _: __m256i) -> __m256i {
505        unimplemented!()
506    }
507
508    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi32)
509    #[hax_lib::opaque]
510    pub fn _mm256_mullo_epi32(_: __m256i, _: __m256i) -> __m256i {
511        unimplemented!()
512    }
513    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epi16)
514    #[hax_lib::opaque]
515    pub fn _mm256_mulhi_epi16(_: __m256i, _: __m256i) -> __m256i {
516        unimplemented!()
517    }
518    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epu32)
519    #[hax_lib::opaque]
520    pub fn _mm256_mul_epu32(_: __m256i, _: __m256i) -> __m256i {
521        unimplemented!()
522    }
523    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_si256)
524    #[hax_lib::opaque]
525    pub fn _mm256_and_si256(_: __m256i, _: __m256i) -> __m256i {
526        unimplemented!()
527    }
528    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_si256)
529    #[hax_lib::opaque]
530    pub fn _mm256_or_si256(_: __m256i, _: __m256i) -> __m256i {
531        unimplemented!()
532    }
533
534    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_si256)
535    #[hax_lib::opaque]
536    pub fn _mm256_xor_si256(_: __m256i, _: __m256i) -> __m256i {
537        unimplemented!()
538    }
539    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi16)
540    #[hax_lib::opaque]
541    pub fn _mm256_srai_epi16<const IMM8: i32>(_: __m256i) -> __m256i {
542        unimplemented!()
543    }
544    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi32)
545    #[hax_lib::opaque]
546    pub fn _mm256_srai_epi32<const IMM8: i32>(_: __m256i) -> __m256i {
547        unimplemented!()
548    }
549    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi16)
550    #[hax_lib::opaque]
551    pub fn _mm256_srli_epi16<const IMM8: i32>(_: __m256i) -> __m256i {
552        unimplemented!()
553    }
554    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi32)
555    #[hax_lib::opaque]
556    pub fn _mm256_srli_epi32<const IMM8: i32>(_: __m256i) -> __m256i {
557        unimplemented!()
558    }
559
560    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi32)
561    #[hax_lib::opaque]
562    pub fn _mm256_slli_epi32<const IMM8: i32>(_: __m256i) -> __m256i {
563        unimplemented!()
564    }
565    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_epi64)
566    #[hax_lib::opaque]
567    pub fn _mm256_permute4x64_epi64<const IMM8: i32>(_: __m256i) -> __m256i {
568        unimplemented!()
569    }
570    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi64)
571    #[hax_lib::opaque]
572    pub fn _mm256_unpackhi_epi64(_: __m256i, _: __m256i) -> __m256i {
573        unimplemented!()
574    }
575    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi32)
576    #[hax_lib::opaque]
577    pub fn _mm256_unpacklo_epi32(_: __m256i, _: __m256i) -> __m256i {
578        unimplemented!()
579    }
580    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi32)
581    #[hax_lib::opaque]
582    pub fn _mm256_unpackhi_epi32(_: __m256i, _: __m256i) -> __m256i {
583        unimplemented!()
584    }
585
586    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi32)
587    #[hax_lib::opaque]
588    pub fn _mm256_cvtepi16_epi32(_: __m128i) -> __m256i {
589        unimplemented!()
590    }
591
592    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32)
593    #[hax_lib::opaque]
594    pub fn _mm256_packs_epi32(_: __m256i, _: __m256i) -> __m256i {
595        unimplemented!()
596    }
597    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti128_si256)
598    #[hax_lib::opaque]
599    pub fn _mm256_inserti128_si256<const IMM8: i32>(_: __m256i, _: __m128i) -> __m256i {
600        unimplemented!()
601    }
602    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi16)
603    #[hax_lib::opaque]
604    pub fn _mm256_blend_epi16<const IMM8: i32>(_: __m256i, _: __m256i) -> __m256i {
605        unimplemented!()
606    }
607
608    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi64)
609    #[hax_lib::opaque]
610    pub fn _mm256_srlv_epi64(_: __m256i, _: __m256i) -> __m256i {
611        unimplemented!()
612    }
613    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi32)
614    #[hax_lib::opaque]
615    pub fn _mm_sllv_epi32(_: __m128i, _: __m128i) -> __m128i {
616        unimplemented!()
617    }
618    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi64)
619    #[hax_lib::opaque]
620    pub fn _mm256_slli_epi64<const IMM8: i32>(_: __m256i) -> __m256i {
621        unimplemented!()
622    }
623
624    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bsrli_epi128)
625    /// NOTE: the bsrli here is different from intel specification. In the intel specification, if an IMM8 is given whose first 8 bits are higher than 15, it fixes it to 16.
626    /// However, the Rust implementation erroneously takes the input modulo 16. Thus, instead of shifting by 16 bits at an input of 16, it shifts by 0.
627    /// We are currently modelling the Rust implementation.
628    #[hax_lib::opaque]
629    pub fn _mm256_bsrli_epi128<const IMM8: i32>(_: __m256i) -> __m256i {
630        unimplemented!()
631    }
632    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_si256)
633    #[hax_lib::opaque]
634    pub fn _mm256_andnot_si256(_: __m256i, _: __m256i) -> __m256i {
635        unimplemented!()
636    }
637
638    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi64)
639    #[hax_lib::opaque]
640    pub fn _mm256_unpacklo_epi64(_: __m256i, _: __m256i) -> __m256i {
641        unimplemented!()
642    }
643    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256)
644    #[hax_lib::opaque]
645    pub fn _mm256_permute2x128_si256<const IMM8: i32>(_: __m256i, _: __m256i) -> __m256i {
646        unimplemented!()
647    }
648
649    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128)
650    #[hax_lib::exclude]
651    pub fn _mm_storeu_si128(output: *mut __m128i, a: __m128i) {
652        // This is equivalent to `*output = a`
653        let mut out = [0u8; 128];
654        extra::mm_storeu_bytes_si128(&mut out, a);
655        unsafe {
656            *(output.as_mut().unwrap()) = BitVec::from_slice(&out, 8);
657        }
658    }
659    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi16)
660    pub fn _mm256_slli_epi16<const SHIFT_BY: i32>(vector: __m256i) -> __m256i {
661        vector.chunked_shift::<16, 16>(FunArray::from_fn(|_| SHIFT_BY as i128))
662    }
663    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi64)
664    pub fn _mm256_srli_epi64<const SHIFT_BY: i32>(vector: __m256i) -> __m256i {
665        vector.chunked_shift::<64, 4>(FunArray::from_fn(|_| -(SHIFT_BY as i128)))
666    }
667    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi16)
668    #[hax_lib::opaque]
669    pub fn _mm256_mullo_epi16(_vector: __m256i, _shifts: __m256i) -> __m256i {
670        todo!()
671    }
672    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi32)
673    #[hax_lib::opaque]
674    pub fn _mm256_sllv_epi32(vector: __m256i, counts: __m256i) -> __m256i {
675        extra::mm256_sllv_epi32_u32_array(vector, counts.to_vec().try_into().unwrap())
676    }
677    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi32)
678    #[hax_lib::opaque]
679    pub fn _mm256_srlv_epi32(vector: __m256i, counts: __m256i) -> __m256i {
680        extra::mm256_srlv_epi32_u32_array(vector, counts.to_vec().try_into().unwrap())
681    }
682    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32)
683    #[hax_lib::opaque]
684    pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
685        extra::mm256_permutevar8x32_epi32_u32_array(a, b.to_vec().try_into().unwrap())
686    }
687    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti128_si256)
688    pub fn _mm256_extracti128_si256<const IMM8: i32>(vector: __m256i) -> __m128i {
689        BitVec::from_fn(|i| vector[i + if IMM8 == 0 { 0 } else { 128 }])
690    }
691    /// [Intel Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi8)
692    #[hax_lib::opaque]
693    pub fn _mm256_shuffle_epi8(vector: __m256i, indexes: __m256i) -> __m256i {
694        let indexes = indexes.to_vec().try_into().unwrap();
695        extra::mm256_shuffle_epi8_i8_array(vector, indexes)
696    }
697}
698
699/// Rewrite lemmas
700const _: () = {
701    #[hax_lib::fstar::before("[@@ $REWRITE_RULE ]")]
702    #[hax_lib::lemma]
703    #[hax_lib::opaque]
704    fn _rw_mm256_sllv_epi32(
705        vector: __m256i,
706        b7: i32,
707        b6: i32,
708        b5: i32,
709        b4: i32,
710        b3: i32,
711        b2: i32,
712        b1: i32,
713        b0: i32,
714    ) -> Proof<
715        {
716            hax_lib::prop::eq(
717                _mm256_sllv_epi32(vector, _mm256_set_epi32(b7, b6, b5, b4, b3, b2, b1, b0)),
718                extra::mm256_sllv_epi32_u32(
719                    vector, b7 as u32, b6 as u32, b5 as u32, b4 as u32, b3 as u32, b2 as u32,
720                    b1 as u32, b0 as u32,
721                ),
722            )
723        },
724    > {
725    }
726
727    #[hax_lib::fstar::before("[@@ $REWRITE_RULE ]")]
728    #[hax_lib::lemma]
729    #[hax_lib::opaque]
730    fn _rw_mm256_srlv_epi32(
731        vector: __m256i,
732        b7: i32,
733        b6: i32,
734        b5: i32,
735        b4: i32,
736        b3: i32,
737        b2: i32,
738        b1: i32,
739        b0: i32,
740    ) -> Proof<
741        {
742            hax_lib::prop::eq(
743                _mm256_srlv_epi32(vector, _mm256_set_epi32(b7, b6, b5, b4, b3, b2, b1, b0)),
744                extra::mm256_srlv_epi32_u32(
745                    vector, b7 as u32, b6 as u32, b5 as u32, b4 as u32, b3 as u32, b2 as u32,
746                    b1 as u32, b0 as u32,
747                ),
748            )
749        },
750    > {
751    }
752
753    #[hax_lib::fstar::before("[@@ $REWRITE_RULE ]")]
754    #[hax_lib::lemma]
755    #[hax_lib::opaque]
756    fn _rw_mm256_permutevar8x32_epi32(
757        vector: __m256i,
758        b7: i32,
759        b6: i32,
760        b5: i32,
761        b4: i32,
762        b3: i32,
763        b2: i32,
764        b1: i32,
765        b0: i32,
766    ) -> Proof<
767        {
768            hax_lib::prop::eq(
769                _mm256_permutevar8x32_epi32(
770                    vector,
771                    _mm256_set_epi32(b7, b6, b5, b4, b3, b2, b1, b0),
772                ),
773                extra::mm256_permutevar8x32_epi32_u32(
774                    vector, b7 as u32, b6 as u32, b5 as u32, b4 as u32, b3 as u32, b2 as u32,
775                    b1 as u32, b0 as u32,
776                ),
777            )
778        },
779    > {
780    }
781
782    #[hax_lib::fstar::replace(
783        r#"
784[@@ Core_models.Abstractions.Bitvec.v_REWRITE_RULE ]
785
786assume
787val e___e_rw_mm256_mullo_epi16_shifts':
788    vector: $:{__m256i} ->
789    s15: (n: $:{u8} {v n < 16}) ->
790    s14: (n: $:{u8} {v n < 16}) ->
791    s13: (n: $:{u8} {v n < 16}) ->
792    s12: (n: $:{u8} {v n < 16}) ->
793    s11: (n: $:{u8} {v n < 16}) ->
794    s10: (n: $:{u8} {v n < 16}) ->
795    s9: (n: $:{u8} {v n < 16}) ->
796    s8: (n: $:{u8} {v n < 16}) ->
797    s7: (n: $:{u8} {v n < 16}) ->
798    s6: (n: $:{u8} {v n < 16}) ->
799    s5: (n: $:{u8} {v n < 16}) ->
800    s4: (n: $:{u8} {v n < 16}) ->
801    s3: (n: $:{u8} {v n < 16}) ->
802    s2: (n: $:{u8} {v n < 16}) ->
803    s1: (n: $:{u8} {v n < 16}) ->
804    s0: (n: $:{u8} {v n < 16})
805  -> Lemma
806    (ensures
807      ($_mm256_mullo_epi16 vector
808          ($_mm256_set_epi16 (${1i16} <<! s15 <: i16)
809              (${1i16} <<! s14 <: i16) (${1i16} <<! s13 <: i16) (${1i16} <<! s12 <: i16)
810              (${1i16} <<! s11 <: i16) (${1i16} <<! s10 <: i16) (${1i16} <<! s9 <: i16)
811              (${1i16} <<! s8 <: i16) (${1i16} <<! s7 <: i16) (${1i16} <<! s6 <: i16)
812              (${1i16} <<! s5 <: i16) (${1i16} <<! s4 <: i16) (${1i16} <<! s3 <: i16)
813              (${1i16} <<! s2 <: i16) (${1i16} <<! s1 <: i16) (${1i16} <<! s0 <: i16)
814            )
815        ) ==
816      (${extra::mm256_mullo_epi16_shifts} vector s15 s14 s13 s12 s11 s10 s9 s8 s7
817          s6 s5 s4 s3 s2 s1 s0))
818
819let ${_rw_mm256_mullo_epi16_shifts} = e___e_rw_mm256_mullo_epi16_shifts'
820    "#
821    )]
822    // Note: this is replace with F* code because we need to refine the input of the lemma.
823    pub fn _rw_mm256_mullo_epi16_shifts() {}
824
825    #[hax_lib::fstar::before("[@@ $REWRITE_RULE ]")]
826    #[hax_lib::lemma]
827    #[hax_lib::opaque]
828    #[allow(unused_variables)]
829    fn _rw_mm_shuffle_epi8(
830        vector: __m128i,
831        e15: i8,
832        e14: i8,
833        e13: i8,
834        e12: i8,
835        e11: i8,
836        e10: i8,
837        e9: i8,
838        e8: i8,
839        e7: i8,
840        e6: i8,
841        e5: i8,
842        e4: i8,
843        e3: i8,
844        e2: i8,
845        e1: i8,
846        e0: i8,
847    ) -> Proof<
848        {
849            hax_lib::prop::eq(
850                _mm_shuffle_epi8(
851                    vector,
852                    _mm_set_epi8(
853                        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
854                    ),
855                ),
856                extra::mm_shuffle_epi8_u8(
857                    vector, e15 as u8, e14 as u8, e13 as u8, e12 as u8, e11 as u8, e10 as u8,
858                    e9 as u8, e8 as u8, e7 as u8, e6 as u8, e5 as u8, e4 as u8, e3 as u8, e2 as u8,
859                    e1 as u8, e0 as u8,
860                ),
861            )
862        },
863    > {
864    }
865
866    #[hax_lib::fstar::before("[@@ $REWRITE_RULE ]")]
867    #[hax_lib::lemma]
868    #[hax_lib::opaque]
869    #[allow(unused_variables)]
870    fn _rw_mm256_shuffle_epi8(
871        vector: __m256i,
872        byte31: i8,
873        byte30: i8,
874        byte29: i8,
875        byte28: i8,
876        byte27: i8,
877        byte26: i8,
878        byte25: i8,
879        byte24: i8,
880        byte23: i8,
881        byte22: i8,
882        byte21: i8,
883        byte20: i8,
884        byte19: i8,
885        byte18: i8,
886        byte17: i8,
887        byte16: i8,
888        byte15: i8,
889        byte14: i8,
890        byte13: i8,
891        byte12: i8,
892        byte11: i8,
893        byte10: i8,
894        byte9: i8,
895        byte8: i8,
896        byte7: i8,
897        byte6: i8,
898        byte5: i8,
899        byte4: i8,
900        byte3: i8,
901        byte2: i8,
902        byte1: i8,
903        byte0: i8,
904    ) -> Proof<
905        {
906            hax_lib::prop::eq(
907                _mm256_shuffle_epi8(
908                    vector,
909                    _mm256_set_epi8(
910                        byte31, byte30, byte29, byte28, byte27, byte26, byte25, byte24, byte23,
911                        byte22, byte21, byte20, byte19, byte18, byte17, byte16, byte15, byte14,
912                        byte13, byte12, byte11, byte10, byte9, byte8, byte7, byte6, byte5, byte4,
913                        byte3, byte2, byte1, byte0,
914                    ),
915                ),
916                extra::mm256_shuffle_epi8_i8(
917                    vector, byte31, byte30, byte29, byte28, byte27, byte26, byte25, byte24, byte23,
918                    byte22, byte21, byte20, byte19, byte18, byte17, byte16, byte15, byte14, byte13,
919                    byte12, byte11, byte10, byte9, byte8, byte7, byte6, byte5, byte4, byte3, byte2,
920                    byte1, byte0,
921                ),
922            )
923        },
924    > {
925    }
926};
927
928pub mod extra {
929    use super::*;
930
931    pub fn mm256_sllv_epi32_u32_array(
932        vector: BitVec<256>,
933        counts: FunArray<8, u32>,
934    ) -> BitVec<256> {
935        vector.chunked_shift::<32, 8>(FunArray::from_fn(|i| counts[i] as i128))
936    }
937
938    pub fn mm256_sllv_epi32_u32(
939        vector: BitVec<256>,
940        b7: u32,
941        b6: u32,
942        b5: u32,
943        b4: u32,
944        b3: u32,
945        b2: u32,
946        b1: u32,
947        b0: u32,
948    ) -> BitVec<256> {
949        mm256_sllv_epi32_u32_array(
950            vector,
951            FunArray::from_fn(|i| match i {
952                7 => b7,
953                6 => b6,
954                5 => b5,
955                4 => b4,
956                3 => b3,
957                2 => b2,
958                1 => b1,
959                0 => b0,
960                _ => unreachable!(),
961            }),
962        )
963    }
964
965    pub fn mm256_srlv_epi32_u32_array(
966        vector: BitVec<256>,
967        counts: FunArray<8, u32>,
968    ) -> BitVec<256> {
969        vector.chunked_shift::<32, 8>(FunArray::from_fn(|i| -(counts[i] as i128)))
970    }
971
972    pub fn mm256_srlv_epi32_u32(
973        vector: BitVec<256>,
974        b7: u32,
975        b6: u32,
976        b5: u32,
977        b4: u32,
978        b3: u32,
979        b2: u32,
980        b1: u32,
981        b0: u32,
982    ) -> BitVec<256> {
983        mm256_srlv_epi32_u32_array(
984            vector,
985            FunArray::from_fn(|i| match i {
986                7 => b7,
987                6 => b6,
988                5 => b5,
989                4 => b4,
990                3 => b3,
991                2 => b2,
992                1 => b1,
993                0 => b0,
994                _ => unreachable!(),
995            }),
996        )
997    }
998
999    pub fn mm256_permutevar8x32_epi32_u32_array(
1000        a: BitVec<256>,
1001        b: FunArray<8, u32>,
1002    ) -> BitVec<256> {
1003        BitVec::from_fn(|i| {
1004            let j = i / 32;
1005            let index = ((b[j] % 8) as u64) * 32;
1006            a[index + i % 32]
1007        })
1008    }
1009
1010    pub fn mm256_permutevar8x32_epi32_u32(
1011        vector: BitVec<256>,
1012        b7: u32,
1013        b6: u32,
1014        b5: u32,
1015        b4: u32,
1016        b3: u32,
1017        b2: u32,
1018        b1: u32,
1019        b0: u32,
1020    ) -> BitVec<256> {
1021        mm256_permutevar8x32_epi32_u32_array(
1022            vector,
1023            FunArray::from_fn(|i| match i {
1024                7 => b7,
1025                6 => b6,
1026                5 => b5,
1027                4 => b4,
1028                3 => b3,
1029                2 => b2,
1030                1 => b1,
1031                0 => b0,
1032                _ => unreachable!(),
1033            }),
1034        )
1035    }
1036
1037    pub fn mm_shuffle_epi8_u8_array(vector: BitVec<128>, indexes: FunArray<16, u8>) -> BitVec<128> {
1038        BitVec::from_fn(|i| {
1039            let nth = i / 8;
1040            let index = indexes[nth];
1041            if index > 127 {
1042                Bit::Zero
1043            } else {
1044                let index = (index % 16) as u64;
1045                vector[index * 8 + i % 8]
1046            }
1047        })
1048    }
1049
1050    pub fn mm_shuffle_epi8_u8(
1051        vector: BitVec<128>,
1052        b15: u8,
1053        b14: u8,
1054        b13: u8,
1055        b12: u8,
1056        b11: u8,
1057        b10: u8,
1058        b9: u8,
1059        b8: u8,
1060        b7: u8,
1061        b6: u8,
1062        b5: u8,
1063        b4: u8,
1064        b3: u8,
1065        b2: u8,
1066        b1: u8,
1067        b0: u8,
1068    ) -> BitVec<128> {
1069        let indexes = FunArray::from_fn(|i| match i {
1070            15 => b15,
1071            14 => b14,
1072            13 => b13,
1073            12 => b12,
1074            11 => b11,
1075            10 => b10,
1076            9 => b9,
1077            8 => b8,
1078            7 => b7,
1079            6 => b6,
1080            5 => b5,
1081            4 => b4,
1082            3 => b3,
1083            2 => b2,
1084            1 => b1,
1085            0 => b0,
1086            _ => unreachable!(),
1087        });
1088        mm_shuffle_epi8_u8_array(vector, indexes)
1089    }
1090
1091    pub fn mm256_shuffle_epi8_i8_array(
1092        vector: BitVec<256>,
1093        indexes: FunArray<32, i8>,
1094    ) -> BitVec<256> {
1095        BitVec::from_fn(|i| {
1096            let nth = i / 8;
1097            let index = indexes[nth];
1098            if index < 0 {
1099                Bit::Zero
1100            } else {
1101                let index = (index % 16) as u64;
1102                vector[if i < 128 { 0 } else { 128 } + index * 8 + i % 8]
1103            }
1104        })
1105    }
1106
1107    pub fn mm256_shuffle_epi8_i8(
1108        vector: BitVec<256>,
1109        byte31: i8,
1110        byte30: i8,
1111        byte29: i8,
1112        byte28: i8,
1113        byte27: i8,
1114        byte26: i8,
1115        byte25: i8,
1116        byte24: i8,
1117        byte23: i8,
1118        byte22: i8,
1119        byte21: i8,
1120        byte20: i8,
1121        byte19: i8,
1122        byte18: i8,
1123        byte17: i8,
1124        byte16: i8,
1125        byte15: i8,
1126        byte14: i8,
1127        byte13: i8,
1128        byte12: i8,
1129        byte11: i8,
1130        byte10: i8,
1131        byte9: i8,
1132        byte8: i8,
1133        byte7: i8,
1134        byte6: i8,
1135        byte5: i8,
1136        byte4: i8,
1137        byte3: i8,
1138        byte2: i8,
1139        byte1: i8,
1140        byte0: i8,
1141    ) -> BitVec<256> {
1142        let indexes = FunArray::from_fn(|i| match i {
1143            31 => byte31,
1144            30 => byte30,
1145            29 => byte29,
1146            28 => byte28,
1147            27 => byte27,
1148            26 => byte26,
1149            25 => byte25,
1150            24 => byte24,
1151            23 => byte23,
1152            22 => byte22,
1153            21 => byte21,
1154            20 => byte20,
1155            19 => byte19,
1156            18 => byte18,
1157            17 => byte17,
1158            16 => byte16,
1159            15 => byte15,
1160            14 => byte14,
1161            13 => byte13,
1162            12 => byte12,
1163            11 => byte11,
1164            10 => byte10,
1165            9 => byte9,
1166            8 => byte8,
1167            7 => byte7,
1168            6 => byte6,
1169            5 => byte5,
1170            4 => byte4,
1171            3 => byte3,
1172            2 => byte2,
1173            1 => byte1,
1174            0 => byte0,
1175            _ => unreachable!(),
1176        });
1177        mm256_shuffle_epi8_i8_array(vector, indexes)
1178    }
1179
1180    pub fn mm256_mullo_epi16_shifts(
1181        vector: __m256i,
1182        s15: u8,
1183        s14: u8,
1184        s13: u8,
1185        s12: u8,
1186        s11: u8,
1187        s10: u8,
1188        s9: u8,
1189        s8: u8,
1190        s7: u8,
1191        s6: u8,
1192        s5: u8,
1193        s4: u8,
1194        s3: u8,
1195        s2: u8,
1196        s1: u8,
1197        s0: u8,
1198    ) -> __m256i {
1199        let shifts = FunArray::<16, _>::from_fn(|i| match i {
1200            15 => s15,
1201            14 => s14,
1202            13 => s13,
1203            12 => s12,
1204            11 => s11,
1205            10 => s10,
1206            9 => s9,
1207            8 => s8,
1208            7 => s7,
1209            6 => s6,
1210            5 => s5,
1211            4 => s4,
1212            3 => s3,
1213            2 => s2,
1214            1 => s1,
1215            0 => s0,
1216            _ => unreachable!(),
1217        });
1218        mm256_mullo_epi16_shifts_array(vector, shifts)
1219    }
1220    pub fn mm256_mullo_epi16_shifts_array(vector: __m256i, shifts: FunArray<16, u8>) -> __m256i {
1221        BitVec::from_fn(|i| {
1222            let nth_bit = i % 16;
1223            let nth_i16 = i / 16;
1224
1225            let shift = shifts[nth_i16] as u64;
1226
1227            if nth_bit >= shift {
1228                vector[nth_i16 * 16 + nth_bit - shift]
1229            } else {
1230                Bit::Zero
1231            }
1232        })
1233    }
1234
1235    #[hax_lib::exclude]
1236    pub fn mm_storeu_bytes_si128(output: &mut [u8], vector: BitVec<128>) {
1237        output.copy_from_slice(&vector.to_vec()[..]);
1238    }
1239}
1240
1241/// Tests of equivalence between `safe::*` and `upstream::*`.
1242#[cfg(all(test, any(target_arch = "x86", target_arch = "x86_64")))]
1243mod tests {
1244    use super::*;
1245
1246    /// Number of tests to run for each function
1247    const N: usize = 1000;
1248
1249    #[test]
1250    fn mm256_slli_epi16() {
1251        macro_rules! mk {
1252                ($($shift: literal)*) => {
1253                    $(for _ in 0..N {
1254                        let input = BitVec::<256>::rand();
1255                        assert_eq!(
1256                            super::_mm256_slli_epi16::<$shift>(input),
1257                            unsafe {upstream::_mm256_slli_epi16::<$shift>(input.into()).into()}
1258                        );
1259                    })*
1260                };
1261            }
1262        mk!(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15);
1263    }
1264
1265    #[test]
1266    fn mm256_srli_epi64() {
1267        macro_rules! mk {
1268                ($($shift: literal)*) => {
1269                    $(for _ in 0..N {
1270                        let input = BitVec::<256>::rand();
1271                        assert_eq!(
1272                            super::_mm256_srli_epi64::<$shift>(input),
1273                            unsafe{upstream::_mm256_srli_epi64::<$shift>(input.into()).into()}
1274                        );
1275                    })*
1276                };
1277            }
1278        mk!(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63);
1279    }
1280
1281    #[test]
1282    fn mm256_sllv_epi32() {
1283        for _ in 0..N {
1284            let vector: BitVec<256> = BitVec::rand();
1285            let counts: BitVec<256> = BitVec::rand();
1286            assert_eq!(super::_mm256_sllv_epi32(vector, counts), unsafe {
1287                upstream::_mm256_sllv_epi32(vector.into(), counts.into()).into()
1288            });
1289        }
1290    }
1291
1292    #[test]
1293    fn mm256_srlv_epi32() {
1294        for _ in 0..N {
1295            let vector: BitVec<256> = BitVec::rand();
1296            let counts: BitVec<256> = BitVec::rand();
1297            assert_eq!(super::_mm256_srlv_epi32(vector, counts), unsafe {
1298                upstream::_mm256_srlv_epi32(vector.into(), counts.into()).into()
1299            });
1300        }
1301    }
1302
1303    #[test]
1304    fn mm256_permutevar8x32_epi32() {
1305        for _ in 0..N {
1306            let vector: BitVec<256> = BitVec::rand();
1307            let counts: BitVec<256> = BitVec::rand();
1308            assert_eq!(super::_mm256_permutevar8x32_epi32(vector, counts), unsafe {
1309                upstream::_mm256_permutevar8x32_epi32(vector.into(), counts.into()).into()
1310            });
1311        }
1312    }
1313
1314    #[test]
1315    fn mm256_castsi256_si128() {
1316        for _ in 0..N {
1317            let vector: BitVec<256> = BitVec::rand();
1318            assert_eq!(super::_mm256_castsi256_si128(vector), unsafe {
1319                upstream::_mm256_castsi256_si128(vector.into()).into()
1320            });
1321        }
1322    }
1323
1324    #[test]
1325    fn mm256_extracti128_si256() {
1326        for _ in 0..N {
1327            let vector: BitVec<256> = BitVec::rand();
1328            assert_eq!(super::_mm256_extracti128_si256::<0>(vector), unsafe {
1329                upstream::_mm256_extracti128_si256::<0>(vector.into()).into()
1330            });
1331            assert_eq!(super::_mm256_extracti128_si256::<1>(vector), unsafe {
1332                upstream::_mm256_extracti128_si256::<1>(vector.into()).into()
1333            });
1334        }
1335    }
1336
1337    #[test]
1338    fn mm_shuffle_epi8() {
1339        for _ in 0..N {
1340            let a: BitVec<128> = BitVec::rand();
1341            let b: BitVec<128> = BitVec::rand();
1342
1343            assert_eq!(super::_mm_shuffle_epi8(a, b), unsafe {
1344                upstream::_mm_shuffle_epi8(a.into(), b.into()).into()
1345            });
1346        }
1347    }
1348
1349    #[test]
1350    fn mm256_shuffle_epi8() {
1351        for _ in 0..N {
1352            let a: BitVec<256> = BitVec::rand();
1353            let b: BitVec<256> = BitVec::rand();
1354
1355            assert_eq!(super::_mm256_shuffle_epi8(a, b), unsafe {
1356                upstream::_mm256_shuffle_epi8(a.into(), b.into()).into()
1357            });
1358        }
1359    }
1360
1361    #[test]
1362    fn mm256_mullo_epi16_shifts() {
1363        pub fn upsteam_mm256_mullo_epi16_shifts(
1364            vector: upstream::__m256i,
1365            s15: u8,
1366            s14: u8,
1367            s13: u8,
1368            s12: u8,
1369            s11: u8,
1370            s10: u8,
1371            s9: u8,
1372            s8: u8,
1373            s7: u8,
1374            s6: u8,
1375            s5: u8,
1376            s4: u8,
1377            s3: u8,
1378            s2: u8,
1379            s1: u8,
1380            s0: u8,
1381        ) -> upstream::__m256i {
1382            unsafe {
1383                upstream::_mm256_mullo_epi16(
1384                    vector,
1385                    upstream::_mm256_set_epi16(
1386                        1i16 << s15,
1387                        1i16 << s14,
1388                        1i16 << s13,
1389                        1i16 << s12,
1390                        1i16 << s11,
1391                        1i16 << s10,
1392                        1i16 << s9,
1393                        1i16 << s8,
1394                        1i16 << s7,
1395                        1i16 << s6,
1396                        1i16 << s5,
1397                        1i16 << s4,
1398                        1i16 << s3,
1399                        1i16 << s2,
1400                        1i16 << s1,
1401                        1i16 << s0,
1402                    ),
1403                )
1404            }
1405        }
1406        for _ in 0..N {
1407            let a: BitVec<256> = BitVec::rand();
1408            let s15: u8 = rand::random::<u8>() % 16;
1409            let s14: u8 = rand::random::<u8>() % 16;
1410            let s13: u8 = rand::random::<u8>() % 16;
1411            let s12: u8 = rand::random::<u8>() % 16;
1412            let s11: u8 = rand::random::<u8>() % 16;
1413            let s10: u8 = rand::random::<u8>() % 16;
1414            let s9: u8 = rand::random::<u8>() % 16;
1415            let s8: u8 = rand::random::<u8>() % 16;
1416            let s7: u8 = rand::random::<u8>() % 16;
1417            let s6: u8 = rand::random::<u8>() % 16;
1418            let s5: u8 = rand::random::<u8>() % 16;
1419            let s4: u8 = rand::random::<u8>() % 16;
1420            let s3: u8 = rand::random::<u8>() % 16;
1421            let s2: u8 = rand::random::<u8>() % 16;
1422            let s1: u8 = rand::random::<u8>() % 16;
1423            let s0: u8 = rand::random::<u8>() % 16;
1424
1425            assert_eq!(
1426                super::extra::mm256_mullo_epi16_shifts(
1427                    a, s15, s14, s13, s12, s11, s10, s9, s8, s7, s6, s5, s4, s3, s2, s1, s0,
1428                ),
1429                upsteam_mm256_mullo_epi16_shifts(
1430                    a.into(),
1431                    s15,
1432                    s14,
1433                    s13,
1434                    s12,
1435                    s11,
1436                    s10,
1437                    s9,
1438                    s8,
1439                    s7,
1440                    s6,
1441                    s5,
1442                    s4,
1443                    s3,
1444                    s2,
1445                    s1,
1446                    s0,
1447                )
1448                .into()
1449            );
1450        }
1451    }
1452}