Skip to main content

rustalign_simd/
lib.rs

1//! RustAlign SIMD - SIMD-accelerated alignment operations
2//!
3//! This crate provides SIMD-accelerated implementations of the
4//! Smith-Waterman alignment algorithm, matching the C++ SSE/AVX2 code.
5//!
6//! ## Platform Support
7//!
8//! - **x86_64**: SSE2, SSE4.2, AVX2 via `core::arch::x86_64`
9//! - **AArch64**: NEON via `core::arch::aarch64`
10//! - **Other**: Scalar fallback implementations
11//!
12//! ## Runtime Dispatch
13//!
14//! The crate supports runtime CPU feature detection and dispatch,
15//! similar to the C++ `__builtin_cpu_supports` approach.
16
17#![warn(missing_docs)]
18#![warn(clippy::all)]
19
20mod cpu;
21mod dp_matrix;
22mod sse_wrapper;
23
24pub use cpu::{CpuFeatures, has_avx2, has_neon, has_popcnt, has_sse2, has_sse4_2};
25pub use dp_matrix::{DpMatrix, MatrixQuartet};
26pub use sse_wrapper::*;
27
28/// SIMD-aligned vector for SSE operations
29///
30/// This matches the C++ EList_sse which provides 16/32-byte aligned storage.
31pub type AlignedVec<T> = Vec<T>;
32
33/// Create a new SIMD-aligned vector
34pub fn aligned_vec<T>(capacity: usize) -> AlignedVec<T>
35where
36    T: Default + Clone,
37{
38    let mut v = Vec::with_capacity(capacity);
39    // Fill with default values to ensure capacity is used
40    for _ in 0..capacity {
41        v.push(T::default());
42    }
43    v
44}
45
46/// SIMD register width in bytes (16 for SSE, 32 for AVX2)
47pub const SSE_REG_WIDTH: usize = 16;
48/// AVX register width in bytes
49pub const AVX_REG_WIDTH: usize = 32;
50
51/// Number of i16 elements per SSE register
52pub const I16_PER_SSE: usize = 8;
53/// Number of u8 elements per SSE register
54pub const U8_PER_SSE: usize = 16;
55/// Number of i16 elements per AVX register
56pub const I16_PER_AVX: usize = 16;
57/// Number of u8 elements per AVX register
58pub const U8_PER_AVX: usize = 32;
59
60#[cfg(test)]
61mod tests {
62    use super::*;
63
64    #[test]
65    fn test_cpu_detection() {
66        // CPU detection should not panic
67        let _features = CpuFeatures::detect();
68    }
69
70    #[test]
71    fn test_aligned_vec() {
72        let vec: AlignedVec<i16> = aligned_vec(10);
73        assert_eq!(vec.capacity(), 10);
74        assert_eq!(vec.len(), 10);
75    }
76
77    #[test]
78    fn test_constants() {
79        assert_eq!(SSE_REG_WIDTH, 16);
80        assert_eq!(AVX_REG_WIDTH, 32);
81        assert_eq!(I16_PER_SSE, 8);
82        assert_eq!(U8_PER_SSE, 16);
83    }
84}