1#![cfg_attr(feature = "nightly", feature(core_io_borrowed_buf))]
34#![cfg_attr(docsrs, feature(doc_cfg))]
35
36#[cfg(feature = "nightly")]
37use std::io::BorrowedCursor;
38use std::{mem, str::FromStr};
39
40use cfg_if::cfg_if;
41use ctor::ctor;
42
43#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
44mod avx2;
45#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
46mod avx512f;
47mod generic;
48#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
49mod sse2;
50
51type LlFunc = unsafe fn(usize, usize, *const u8, *mut u8);
52static mut IMPL: (LlFunc, LlFunc) = (generic::shuffle, generic::unshuffle);
53
54#[derive(Clone, Copy, Debug, Eq, PartialEq)]
56#[non_exhaustive]
57pub enum SimdImpl {
58 Auto,
60 Generic,
62 Sse2,
64 Avx2,
66 Avx512F,
68}
69
70#[derive(Clone, Copy, Debug, PartialEq, Eq)]
71pub struct ParseSimdImplErr;
72
73impl FromStr for SimdImpl {
74 type Err = ParseSimdImplErr;
75
76 fn from_str(s: &str) -> Result<Self, Self::Err> {
77 match s.to_lowercase().as_str() {
78 "auto" => Ok(SimdImpl::Auto),
79 "generic" => Ok(SimdImpl::Generic),
80 "sse2" => Ok(SimdImpl::Sse2),
81 "avx2" => Ok(SimdImpl::Avx2),
82 "avx512f" => Ok(SimdImpl::Avx512F),
83 _ => Err(ParseSimdImplErr),
84 }
85 }
86}
87
88#[ctor]
89fn select_implementation_ctor() {
90 unsafe { select_implementation(SimdImpl::Auto) }
92}
93
94pub unsafe fn select_implementation(impl_: SimdImpl) {
104 cfg_if! {
106 if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
107 match impl_ {
108 SimdImpl::Auto => {
109 if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw"){
110 unsafe { IMPL = (avx512f::shuffle, avx2::unshuffle); }
111 } else if is_x86_feature_detected!("avx2") {
112 unsafe { IMPL = (avx2::shuffle, avx2::unshuffle); }
113 } else if is_x86_feature_detected!("sse2") {
114 unsafe { IMPL = (sse2::shuffle, sse2::unshuffle); }
115 } else {
116 unsafe { IMPL = (generic::shuffle, generic::unshuffle); }
117 }
118 },
119 SimdImpl::Generic => unsafe { IMPL = (generic::shuffle, generic::unshuffle); },
120 SimdImpl::Sse2 => unsafe { IMPL = (sse2::shuffle, sse2::unshuffle); },
121 SimdImpl::Avx2 => unsafe { IMPL = (avx2::shuffle, sse2::unshuffle); },
122 SimdImpl::Avx512F => unsafe { IMPL = (avx512f::shuffle, sse2::unshuffle); },
123 }
124 } else {
125 let _ = impl_;
126 unsafe { IMPL = (generic::shuffle, generic::unshuffle); }
127 }
128 }
129}
130
131#[cfg_attr(
144 target_endian = "little",
145 doc = "assert_eq!(out, [0x01, 0x02, 0x03, 0x04, 0x00, 0x00, 0x00, 0x00]);"
146)]
147#[cfg_attr(
148 target_endian = "big",
149 doc = "assert_eq!(out, [0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04]);"
150)]
151pub fn shuffle_objects<T: Copy>(src: &[T]) -> Vec<u8> {
153 let ts = mem::size_of::<T>();
154 assert!(ts > 1, "No point shuffling plain [u8]");
155 let mut dst = Vec::with_capacity(mem::size_of_val(src));
156 unsafe {
158 IMPL.0(
159 ts,
160 mem::size_of_val(src),
161 src.as_ptr() as *const u8,
162 dst.as_mut_ptr(),
163 );
164 dst.set_len(mem::size_of_val(src));
165 };
166 dst
167}
168
169pub fn shuffle(typesize: usize, src: &[u8]) -> Vec<u8> {
180 let mut dst = Vec::with_capacity(src.len());
181 unsafe {
183 IMPL.0(typesize, src.len(), src.as_ptr(), dst.as_mut_ptr());
184 dst.set_len(src.len());
185 }
186 assert_eq!(src.len(), dst.len());
187 dst
188}
189
190pub fn shuffle_into(typesize: usize, src: &[u8], dst: &mut [u8]) {
201 assert_eq!(src.len(), dst.len());
202 unsafe {
204 IMPL.0(typesize, src.len(), src.as_ptr(), dst.as_mut_ptr());
205 }
206}
207
208#[cfg(feature = "nightly")]
224#[cfg_attr(docsrs, doc(cfg(feature = "nightly")))]
225pub fn shuffle_buf(typesize: usize, src: &[u8], mut buf: BorrowedCursor<'_>) {
226 assert!(buf.capacity() >= src.len());
227 unsafe {
228 let dst: *mut u8 = buf.as_mut().as_mut_ptr().cast();
229 IMPL.0(typesize, src.len(), src.as_ptr(), dst);
230 buf.advance(src.len());
231 }
232}
233
234pub unsafe fn unshuffle_objects<T: Copy>(src: &[u8]) -> Vec<T> {
252 let ts = mem::size_of::<T>();
253 assert!(ts > 1, "No point shuffling plain [u8]");
254 let mut dst = Vec::with_capacity(src.len() / ts);
255 unsafe {
257 IMPL.1(
258 ts,
259 mem::size_of_val(src),
260 src.as_ptr(),
261 dst.as_mut_ptr() as *mut u8,
262 );
263 dst.set_len(src.len() / ts);
264 }
265 assert_eq!(mem::size_of_val(src), mem::size_of_val(&dst[..]));
266 dst
267}
268
269pub fn unshuffle(typesize: usize, src: &[u8]) -> Vec<u8> {
280 let mut dst = Vec::with_capacity(src.len());
281 unsafe {
282 IMPL.1(typesize, src.len(), src.as_ptr(), dst.as_mut_ptr());
283 dst.set_len(src.len());
284 }
285 assert_eq!(src.len(), dst.len());
286 dst
287}
288
289pub fn unshuffle_into(typesize: usize, src: &[u8], dst: &mut [u8]) {
301 assert_eq!(src.len(), dst.len());
302 unsafe {
303 IMPL.1(typesize, src.len(), src.as_ptr(), dst.as_mut_ptr());
304 }
305}
306
307#[cfg(feature = "nightly")]
323#[cfg_attr(docsrs, doc(cfg(feature = "nightly")))]
324pub fn unshuffle_buf(typesize: usize, src: &[u8], mut buf: BorrowedCursor<'_>) {
325 assert!(buf.capacity() >= src.len());
326 unsafe {
327 let dst: *mut u8 = buf.as_mut().as_mut_ptr().cast();
328 IMPL.1(typesize, src.len(), src.as_ptr(), dst);
329 buf.advance(src.len());
330 }
331}
332
333#[cfg(test)]
334mod t {
335 use super::*;
336
337 mod shuffle_objects {
339 use super::*;
340
341 #[test]
343 fn twobytwo() {
344 let src = [0x1234u16, 0x5678u16];
345 let dst = shuffle_objects(&src[..]);
346 cfg_if! {
347 if #[cfg(target_endian = "big")] {
348 let expected = [0x78u8, 0x34, 0x56, 0x12];
349 } else {
350 let expected = [0x34, 0x78u8, 0x12, 0x56];
351 }
352 }
353 assert_eq!(dst, &expected[..]);
354 }
355
356 #[test]
358 fn fourbyfour() {
359 let src = [0x11223344u32, 0x55667788, 0x99aabbcc, 0xddeeff00];
360 let dst = shuffle_objects(&src[..]);
361 cfg_if! {
362 if #[cfg(target_endian = "big")] {
363 let expected = [0x00u8, 0xcc, 0x88, 0x44, 0xff, 0xbb, 0x77, 0x33,
364 0xee, 0xaa, 0x66, 0x22, 0xdd, 0x99, 0x55, 0x11];
365 } else {
366 let expected = [0x44, 0x88, 0xcc, 0x00, 0x33, 0x77, 0xbb, 0xff,
367 0x22, 0x66, 0xaa, 0xee, 0x11, 0x55, 0x99, 0xdd];
368 }
369 }
370 assert_eq!(dst, &expected[..]);
371 }
372 }
373
374 mod shuffle {
375 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
376 use rand::Rng;
377 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
378 use rstest::rstest;
379
380 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
382 #[rstest]
383 #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"),
384 case::sse2(crate::sse2::shuffle, is_x86_feature_detected!("sse2")))]
385 #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"),
386 case::avx2(crate::avx2::shuffle, is_x86_feature_detected!("avx2")))]
387 #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"),
388 case::avx512f(crate::avx512f::shuffle,
389 is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw")
390 )
391 )]
392 fn compare(
393 #[values(2, 4, 8, 13, 16, 18, 32, 36, 43, 47)] typesize: usize,
394 #[values(64, 65, 256, 258, 1024, 1028, 4096, 4112)] len: usize,
395 #[case] f: unsafe fn(usize, usize, *const u8, *mut u8),
396 #[case] has_feature: bool,
397 ) {
398 if !has_feature {
399 eprintln!("Skipping: CPU feature unavailable.");
400 return;
401 }
402
403 let mut rng = rand::rng();
404
405 let src = (0..len).map(|_| rng.random()).collect::<Vec<u8>>();
406 let mut generic_dst = vec![0u8; len];
407 let mut opt_dst = vec![0u8; len];
408 unsafe {
409 crate::generic::shuffle(typesize, len, src.as_ptr(), generic_dst.as_mut_ptr());
410 f(typesize, len, src.as_ptr(), opt_dst.as_mut_ptr());
411 }
412 assert_eq!(generic_dst, opt_dst);
413 }
414 }
415
416 mod unshuffle_objects {
418 use super::*;
419
420 #[test]
422 fn twobytwo() {
423 cfg_if! {
424 if #[cfg(target_endian = "big")] {
425 let src = [0x78u8, 0x34, 0x56, 0x12];
426 } else {
427 let src = [0x34, 0x78u8, 0x12, 0x56];
428 }
429 }
430 let dst = unsafe { unshuffle_objects::<u16>(&src[..]) };
431 assert_eq!(dst, &[0x1234u16, 0x5678u16][..]);
432 }
433
434 #[test]
436 fn fourbyfour() {
437 cfg_if! {
438 if #[cfg(target_endian = "big")] {
439 let src = [0x00u8, 0xcc, 0x88, 0x44, 0xff, 0xbb, 0x77, 0x33,
440 0xee, 0xaa, 0x66, 0x22, 0xdd, 0x99, 0x55, 0x11];
441 } else {
442 let src = [0x44, 0x88, 0xcc, 0x00, 0x33, 0x77, 0xbb, 0xff,
443 0x22, 0x66, 0xaa, 0xee, 0x11, 0x55, 0x99, 0xdd];
444 }
445 }
446 let dst = unsafe { unshuffle_objects::<u32>(&src[..]) };
447 assert_eq!(
448 dst,
449 &[0x11223344u32, 0x55667788, 0x99aabbcc, 0xddeeff00][..]
450 );
451 }
452 }
453
454 mod unshuffle {
455 use rand::Rng;
456 use rstest::rstest;
457
458 #[rstest]
460 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
461 #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"),
462 case::sse2(crate::sse2::unshuffle, is_x86_feature_detected!("sse2")))]
463 #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"),
464 case::avx2(crate::avx2::unshuffle, is_x86_feature_detected!("avx2")))]
465 fn compare(
466 #[values(2, 4, 8, 16, 18, 32, 36, 43, 47)] typesize: usize,
467 #[values(64, 65, 256, 258, 1024, 1028, 4096, 4112)] len: usize,
468 #[case] f: unsafe fn(usize, usize, *const u8, *mut u8),
469 #[case] has_feature: bool,
470 ) {
471 if !has_feature {
472 eprintln!("Skipping: CPU feature unavailable.");
473 return;
474 }
475
476 let mut rng = rand::rng();
477
478 let src = (0..len).map(|_| rng.random()).collect::<Vec<u8>>();
479 let mut generic_dst = vec![0u8; len];
480 let mut opt_dst = vec![0u8; len];
481 unsafe {
482 crate::generic::unshuffle(typesize, len, src.as_ptr(), generic_dst.as_mut_ptr());
483 f(typesize, len, src.as_ptr(), opt_dst.as_mut_ptr());
484 }
485 assert_eq!(generic_dst, opt_dst);
486 }
487
488 #[rstest]
490 fn inverse(
491 #[values(2, 4, 8, 16, 18, 32, 36, 43, 47)] typesize: usize,
492 #[values(64, 65, 256, 258, 1024, 1028, 4096, 4112)] len: usize,
493 ) {
494 let mut rng = rand::rng();
495
496 let src = (0..len).map(|_| rng.random()).collect::<Vec<u8>>();
497 let mut shuffled = vec![0u8; len];
498 let mut dst = vec![0u8; len];
499 unsafe {
500 crate::generic::shuffle(typesize, len, src.as_ptr(), shuffled.as_mut_ptr());
501 crate::generic::unshuffle(typesize, len, shuffled.as_ptr(), dst.as_mut_ptr());
502 }
503 assert_eq!(src, dst);
504 }
505 }
506}