lance_linalg/simd.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Poor-man's SIMD
5//!
6//! The difference between this implementation and [std::simd] is that
7//! this implementation holds the SIMD register directly, thus it exposes more
8//! optimization opportunity to use wide range of instructions available.
9//!
10//! Also, it gives us more control, for example, it is likely that we will have
11//! f16/bf16 support before the standard library does.
12//!
13//! The API are close to [std::simd] to make migration easier in the future.
14
15use std::ops::{Add, AddAssign, Mul, Sub, SubAssign};
16
17pub mod f32;
18pub mod i32;
19pub mod u8;
20
21use num_traits::{Float, Num};
22use u8::u8x16;
23
24/// Lance SIMD lib
25///
26pub trait SIMD<T: Num + Copy, const N: usize>:
27 std::fmt::Debug
28 + AddAssign<Self>
29 + Add<Self, Output = Self>
30 + Mul<Self, Output = Self>
31 + Sub<Self, Output = Self>
32 + SubAssign<Self>
33 + Copy
34 + Clone
35 + Sized
36 + for<'a> From<&'a [T]>
37{
38 const LANES: usize = N;
39
40 /// Create a new instance with all lanes set to `val`.
41 fn splat(val: T) -> Self;
42
43 /// Create a new instance with all lanes set to zero.
44 fn zeros() -> Self;
45
46 /// Load aligned data from aligned memory.
47 ///
48 /// # Safety
49 ///
50 /// It crashes if the ptr is not aligned.
51 unsafe fn load(ptr: *const T) -> Self;
52
53 /// Load unaligned data from memory.
54 ///
55 /// # Safety
56 unsafe fn load_unaligned(ptr: *const T) -> Self;
57
58 /// Store the values to aligned memory.
59 ///
60 /// # Safety
61 ///
62 /// It crashes if the ptr is not aligned
63 unsafe fn store(&self, ptr: *mut T);
64
65 /// Store the values to unaligned memory.
66 ///
67 /// # Safety
68 unsafe fn store_unaligned(&self, ptr: *mut T);
69
70 /// Return the values as an array.
71 fn as_array(&self) -> [T; N] {
72 let mut arr = [T::zero(); N];
73 unsafe {
74 self.store_unaligned(arr.as_mut_ptr());
75 }
76 arr
77 }
78
79 /// Calculate the sum across this vector.
80 fn reduce_sum(&self) -> T;
81
82 /// Find the minimal value in the vector.
83 fn reduce_min(&self) -> T;
84
85 /// Return the minimal value of these two vectors.
86 fn min(&self, rhs: &Self) -> Self;
87
88 /// Find the index of value in the vector. If not found, return None.
89 fn find(&self, val: T) -> Option<i32>;
90}
91
92pub trait FloatSimd<F: Float, const N: usize>: SIMD<F, N> {
93 /// fused multiply-add
94 ///
95 /// c = a * b + c
96 fn multiply_add(&mut self, a: Self, b: Self);
97}
98
99pub trait Shuffle {
100 fn shuffle(&self, indices: u8x16) -> Self;
101}