use derive_more::{Display, IsVariant};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Display, IsVariant)]
#[display("{}", self.as_str())]
pub enum RotateKind {
Rotate90,
Rotate270,
Rotate90FlipH,
Rotate270FlipH,
}
impl RotateKind {
pub const fn as_str(&self) -> &'static str {
match self {
Self::Rotate90 => "rotate90",
Self::Rotate270 => "rotate270",
Self::Rotate90FlipH => "rotate90fliph",
Self::Rotate270FlipH => "rotate270fliph",
}
}
}
#[inline]
#[doc(hidden)]
pub fn rotate_buf_u8_scalar(
dst: &mut [u8],
src: &[u8],
src_w: usize,
src_h: usize,
channels: usize,
rotation: RotateKind,
) {
let elements = src_w
.checked_mul(src_h)
.and_then(|wh| wh.checked_mul(channels))
.unwrap_or_else(|| {
panic!("rotate_buf_u8_scalar: dimensions {src_w}x{src_h}x{channels} overflow usize")
});
assert_eq!(
src.len(),
elements,
"rotate_buf_u8_scalar: src.len() ({}) must equal src_w * src_h * channels ({} * {} * {} = {})",
src.len(),
src_w,
src_h,
channels,
elements,
);
assert_eq!(
dst.len(),
elements,
"rotate_buf_u8_scalar: dst.len() ({}) must equal src.len() ({})",
dst.len(),
elements,
);
let out_w = src_h;
for y in 0..src_h {
for x in 0..src_w {
let (nx, ny) = match rotation {
RotateKind::Rotate90 => (src_h - 1 - y, x),
RotateKind::Rotate270 => (y, src_w - 1 - x),
RotateKind::Rotate90FlipH => (y, x),
RotateKind::Rotate270FlipH => (src_h - 1 - y, src_w - 1 - x),
};
let src_off = (y * src_w + x) * channels;
let dst_off = (ny * out_w + nx) * channels;
dst[dst_off..dst_off + channels].copy_from_slice(&src[src_off..src_off + channels]);
}
}
}
#[cfg(target_arch = "aarch64")]
#[inline]
#[target_feature(enable = "neon")]
unsafe fn rotate_buf_u8_channels4_neon(
dst: &mut [u8],
src: &[u8],
src_w: usize,
src_h: usize,
rotation: RotateKind,
) {
let channels = 4usize;
let elements = src_w
.checked_mul(src_h)
.and_then(|wh| wh.checked_mul(channels))
.unwrap_or_else(|| {
panic!("rotate_buf_u8_channels4_neon: dimensions {src_w}x{src_h}x4 overflow usize")
});
assert_eq!(
src.len(),
elements,
"rotate_buf_u8_channels4_neon: src.len() ({}) must equal src_w * src_h * 4 ({} * {} * 4 = {})",
src.len(),
src_w,
src_h,
elements,
);
assert_eq!(
dst.len(),
elements,
"rotate_buf_u8_channels4_neon: dst.len() ({}) must equal src.len() ({})",
dst.len(),
elements,
);
let out_w = src_h;
unsafe {
let src_base = src.as_ptr();
let dst_base = dst.as_mut_ptr();
for y in 0..src_h {
let row_x = src_w - (src_w % 4);
let mut x = 0usize;
while x + 4 <= row_x {
let src_off = (y * src_w + x) * channels;
let tile = core::arch::aarch64::vld1q_u8(src_base.add(src_off));
let mut scratch = [0u8; 16];
core::arch::aarch64::vst1q_u8(scratch.as_mut_ptr(), tile);
for lane in 0..4 {
let xx = x + lane;
let (nx, ny) = match rotation {
RotateKind::Rotate90 => (src_h - 1 - y, xx),
RotateKind::Rotate270 => (y, src_w - 1 - xx),
RotateKind::Rotate90FlipH => (y, xx),
RotateKind::Rotate270FlipH => (src_h - 1 - y, src_w - 1 - xx),
};
let dst_off = (ny * out_w + nx) * channels;
let pixel: u32 = core::ptr::read_unaligned(scratch.as_ptr().add(lane * 4).cast::<u32>());
core::ptr::write_unaligned(dst_base.add(dst_off).cast::<u32>(), pixel);
}
x += 4;
}
while x < src_w {
let (nx, ny) = match rotation {
RotateKind::Rotate90 => (src_h - 1 - y, x),
RotateKind::Rotate270 => (y, src_w - 1 - x),
RotateKind::Rotate90FlipH => (y, x),
RotateKind::Rotate270FlipH => (src_h - 1 - y, src_w - 1 - x),
};
let src_off = (y * src_w + x) * channels;
let dst_off = (ny * out_w + nx) * channels;
let pixel: u32 = core::ptr::read_unaligned(src_base.add(src_off).cast::<u32>());
core::ptr::write_unaligned(dst_base.add(dst_off).cast::<u32>(), pixel);
x += 1;
}
}
}
}
#[inline]
#[doc(hidden)]
pub fn rotate_buf_u8(
dst: &mut [u8],
src: &[u8],
src_w: usize,
src_h: usize,
channels: usize,
rotation: RotateKind,
) {
let elements = src_w
.checked_mul(src_h)
.and_then(|wh| wh.checked_mul(channels))
.unwrap_or_else(|| {
panic!("simd::vlm::rotate_buf_u8: dimensions {src_w}x{src_h}x{channels} overflow usize")
});
assert_eq!(
src.len(),
elements,
"simd::vlm::rotate_buf_u8: src.len() ({}) must equal src_w * src_h * channels ({} * {} * {} = {})",
src.len(),
src_w,
src_h,
channels,
elements,
);
assert_eq!(
dst.len(),
elements,
"simd::vlm::rotate_buf_u8: dst.len() ({}) must equal src.len() ({})",
dst.len(),
elements,
);
#[cfg(target_arch = "aarch64")]
{
if channels == 4 && crate::simd::is_neon_available() {
unsafe { rotate_buf_u8_channels4_neon(dst, src, src_w, src_h, rotation) };
return;
}
}
rotate_buf_u8_scalar(dst, src, src_w, src_h, channels, rotation);
}
#[cfg(test)]
mod tests {
use super::{RotateKind, rotate_buf_u8, rotate_buf_u8_scalar};
fn src(w: usize, h: usize, channels: usize) -> Vec<u8> {
(0..(w * h * channels)).map(|i| (i % 251) as u8).collect()
}
fn rotate_via(
dispatch: bool,
w: usize,
h: usize,
channels: usize,
rotation: RotateKind,
) -> Vec<u8> {
let s = src(w, h, channels);
let mut d = vec![0u8; s.len()];
if dispatch {
rotate_buf_u8(&mut d, &s, w, h, channels, rotation);
} else {
rotate_buf_u8_scalar(&mut d, &s, w, h, channels, rotation);
}
d
}
#[test]
fn rotate_buf_u8_channels4_scalar_matches_dispatcher_exact() {
for &w in &[1usize, 4, 5, 7, 8, 16, 17, 33] {
for &h in &[1usize, 2, 4, 8, 17] {
for &rotation in &[
RotateKind::Rotate90,
RotateKind::Rotate270,
RotateKind::Rotate90FlipH,
RotateKind::Rotate270FlipH,
] {
let s = rotate_via(false, w, h, 4, rotation);
let d = rotate_via(true, w, h, 4, rotation);
assert_eq!(
s, d,
"Exact mismatch (w={w}, h={h}, channels=4, rotation={rotation:?})"
);
}
}
}
}
#[test]
fn rotate_buf_u8_channels3_scalar_matches_dispatcher_exact() {
for &w in &[1usize, 4, 17] {
for &h in &[1usize, 4, 8] {
for &rotation in &[
RotateKind::Rotate90,
RotateKind::Rotate270,
RotateKind::Rotate90FlipH,
RotateKind::Rotate270FlipH,
] {
let s = rotate_via(false, w, h, 3, rotation);
let d = rotate_via(true, w, h, 3, rotation);
assert_eq!(
s, d,
"Exact mismatch (w={w}, h={h}, channels=3, rotation={rotation:?})"
);
}
}
}
}
#[test]
fn rotate_buf_u8_rotate90_pin() {
let w = 2;
let h = 2;
let channels = 4;
let s: Vec<u8> = (0..16).map(|i| i as u8).collect();
let mut d = vec![0u8; 16];
rotate_buf_u8(&mut d, &s, w, h, channels, RotateKind::Rotate90);
assert_eq!(&d[4..8], &s[0..4], "Rotate90: src(0,0) → dst[4..8]");
assert_eq!(&d[12..16], &s[4..8], "Rotate90: src(1,0) → dst[12..16]");
assert_eq!(&d[0..4], &s[8..12], "Rotate90: src(0,1) → dst[0..4]");
assert_eq!(&d[8..12], &s[12..16], "Rotate90: src(1,1) → dst[8..12]");
}
#[test]
fn rotate_buf_u8_double_rotate_round_trip() {
let w = 4;
let h = 3;
let channels = 4;
let s: Vec<u8> = (0..(w * h * channels)).map(|i| (i % 251) as u8).collect();
let mut once = vec![0u8; s.len()];
rotate_buf_u8(&mut once, &s, w, h, channels, RotateKind::Rotate90);
let mut twice = vec![0u8; s.len()];
rotate_buf_u8(&mut twice, &once, h, w, channels, RotateKind::Rotate270);
assert_eq!(twice, s, "Rotate90 ∘ Rotate270 should be identity");
}
#[test]
#[should_panic(
expected = "simd::vlm::rotate_buf_u8: src.len() (3) must equal src_w * src_h * channels"
)]
fn rotate_buf_u8_panics_on_size_mismatch() {
let s = vec![0u8; 3]; let mut d = vec![0u8; 16];
rotate_buf_u8(&mut d, &s, 2, 2, 4, RotateKind::Rotate90);
}
#[test]
#[should_panic(expected = "overflow usize")]
fn rotate_buf_u8_panics_on_dimension_overflow() {
let s = vec![0u8; 16];
let mut d = vec![0u8; 16];
rotate_buf_u8(&mut d, &s, usize::MAX / 2 + 1, 2, 4, RotateKind::Rotate90);
}
#[test]
fn rotate_buf_u8_rotate90_flip_h_collapses() {
let w = 2;
let h = 2;
let channels = 4;
let s: Vec<u8> = (0..16).map(|i| i as u8).collect();
let mut d = vec![0u8; 16];
rotate_buf_u8(&mut d, &s, w, h, channels, RotateKind::Rotate90FlipH);
assert_eq!(&d[0..4], &s[0..4]);
assert_eq!(&d[8..12], &s[4..8]);
assert_eq!(&d[4..8], &s[8..12]);
assert_eq!(&d[12..16], &s[12..16]);
}
#[test]
fn rotate_kind_as_str_and_display_all_variants() {
let cases = [
(RotateKind::Rotate90, "rotate90"),
(RotateKind::Rotate270, "rotate270"),
(RotateKind::Rotate90FlipH, "rotate90fliph"),
(RotateKind::Rotate270FlipH, "rotate270fliph"),
];
for (kind, tag) in cases {
assert_eq!(kind.as_str(), tag, "as_str mismatch for {kind:?}");
assert_eq!(kind.to_string(), tag, "Display mismatch for {kind:?}");
}
}
#[test]
#[should_panic(expected = "rotate_buf_u8_scalar: dimensions")]
fn rotate_buf_u8_scalar_panics_on_dimension_overflow() {
let s = vec![0u8; 16];
let mut d = vec![0u8; 16];
rotate_buf_u8_scalar(&mut d, &s, usize::MAX / 2 + 1, 2, 4, RotateKind::Rotate90);
}
#[test]
#[should_panic(
expected = "rotate_buf_u8_scalar: src.len() (3) must equal src_w * src_h * channels (2 * 2 * 4 = 16)"
)]
fn rotate_buf_u8_scalar_panics_on_src_size_mismatch() {
let s = vec![0u8; 3]; let mut d = vec![0u8; 16];
rotate_buf_u8_scalar(&mut d, &s, 2, 2, 4, RotateKind::Rotate90);
}
#[test]
#[should_panic(expected = "rotate_buf_u8_scalar: dst.len() (3) must equal src.len() (16)")]
fn rotate_buf_u8_scalar_panics_on_dst_size_mismatch() {
let s = vec![0u8; 16]; let mut d = vec![0u8; 3]; rotate_buf_u8_scalar(&mut d, &s, 2, 2, 4, RotateKind::Rotate90);
}
#[test]
#[should_panic(expected = "simd::vlm::rotate_buf_u8: dst.len() (3) must equal src.len() (16)")]
fn rotate_buf_u8_dispatch_panics_on_dst_size_mismatch() {
let s = vec![0u8; 16]; let mut d = vec![0u8; 3]; rotate_buf_u8(&mut d, &s, 2, 2, 4, RotateKind::Rotate90);
}
#[cfg(target_arch = "aarch64")]
#[test]
#[should_panic(expected = "rotate_buf_u8_channels4_neon: dimensions")]
fn rotate_buf_u8_neon_panics_on_dimension_overflow() {
if !crate::simd::is_neon_available() {
panic!("rotate_buf_u8_channels4_neon: dimensions (skipped — NEON unavailable)");
}
let s = vec![0u8; 16];
let mut d = vec![0u8; 16];
unsafe {
super::rotate_buf_u8_channels4_neon(&mut d, &s, usize::MAX / 2 + 1, 2, RotateKind::Rotate90)
};
}
#[cfg(target_arch = "aarch64")]
#[test]
#[should_panic(
expected = "rotate_buf_u8_channels4_neon: src.len() (3) must equal src_w * src_h * 4 (2 * 2 * 4 = 16)"
)]
fn rotate_buf_u8_neon_panics_on_src_size_mismatch() {
if !crate::simd::is_neon_available() {
panic!(
"rotate_buf_u8_channels4_neon: src.len() (3) must equal src_w * src_h * 4 (2 * 2 * 4 = 16) (skipped — NEON unavailable)"
);
}
let s = vec![0u8; 3]; let mut d = vec![0u8; 16];
unsafe { super::rotate_buf_u8_channels4_neon(&mut d, &s, 2, 2, RotateKind::Rotate90) };
}
#[cfg(target_arch = "aarch64")]
#[test]
#[should_panic(
expected = "rotate_buf_u8_channels4_neon: dst.len() (3) must equal src.len() (16)"
)]
fn rotate_buf_u8_neon_panics_on_dst_size_mismatch() {
if !crate::simd::is_neon_available() {
panic!(
"rotate_buf_u8_channels4_neon: dst.len() (3) must equal src.len() (16) (skipped — NEON unavailable)"
);
}
let s = vec![0u8; 16]; let mut d = vec![0u8; 3]; unsafe { super::rotate_buf_u8_channels4_neon(&mut d, &s, 2, 2, RotateKind::Rotate90) };
}
#[cfg(target_arch = "aarch64")]
#[test]
fn rotate_buf_u8_neon_matches_scalar_bit_identical() {
if !crate::simd::is_neon_available() {
return;
}
let channels = 4usize;
for &w in &[1usize, 2, 3, 4, 5, 7, 8, 9, 16, 17] {
for &h in &[1usize, 2, 3, 5, 8] {
for &rotation in &[
RotateKind::Rotate90,
RotateKind::Rotate270,
RotateKind::Rotate90FlipH,
RotateKind::Rotate270FlipH,
] {
let s = src(w, h, channels);
let mut scalar = vec![0u8; s.len()];
rotate_buf_u8_scalar(&mut scalar, &s, w, h, channels, rotation);
let mut neon = vec![0u8; s.len()];
unsafe { super::rotate_buf_u8_channels4_neon(&mut neon, &s, w, h, rotation) };
assert_eq!(
neon, scalar,
"NEON vs scalar Exact mismatch (w={w}, h={h}, channels=4, rotation={rotation:?})"
);
}
}
}
}
}