pixie_anim_lib/simd/
x86_64.rs

1//! x86_64 SIMD implementations.
2
3use crate::color::Lab;
4use crate::quant::Rgb;
5use std::arch::x86_64::*;
6
7/// A palette stored in planar format for SIMD efficiency.
8pub struct PlanarLabPalette {
9    pub l: Vec<f32>,
10    pub a: Vec<f32>,
11    pub b: Vec<f32>,
12    pub len: usize,
13}
14
15impl PlanarLabPalette {
16    pub fn from_lab(colors: &[Lab]) -> Self {
17        let len = colors.len();
18        let mut l = Vec::with_capacity(len);
19        let mut a = Vec::with_capacity(len);
20        let mut b = Vec::with_capacity(len);
21        for c in colors {
22            l.push(c.l);
23            a.push(c.a);
24            b.push(c.b);
25        }
26        Self { l, a, b, len }
27    }
28}
29
30/// Find nearest color in Lab space using planar SIMD (AVX2).
31#[target_feature(enable = "avx2")]
32pub unsafe fn find_nearest_color_lab_planar_avx2(pixel: Lab, palette: &PlanarLabPalette) -> usize {
33    let mut min_dist = f32::MAX;
34    let mut best_idx = 0;
35
36    let p_l = _mm256_set1_ps(pixel.l);
37    let p_a = _mm256_set1_ps(pixel.a);
38    let p_b = _mm256_set1_ps(pixel.b);
39
40    let chunks = palette.len / 8;
41
42    for i in 0..chunks {
43        let offset = i * 8;
44
45        // Load 8 components at once from planar vectors
46        let l_v = _mm256_loadu_ps(palette.l.as_ptr().add(offset));
47        let a_v = _mm256_loadu_ps(palette.a.as_ptr().add(offset));
48        let b_v = _mm256_loadu_ps(palette.b.as_ptr().add(offset));
49
50        // Calculate squared distances: (p - c)^2
51        let dl = _mm256_sub_ps(p_l, l_v);
52        let da = _mm256_sub_ps(p_a, a_v);
53        let db = _mm256_sub_ps(p_b, b_v);
54
55        let dist_v = _mm256_add_ps(
56            _mm256_add_ps(_mm256_mul_ps(dl, dl), _mm256_mul_ps(da, da)),
57            _mm256_mul_ps(db, db),
58        );
59
60        // Extract and compare
61        let mut dists = [0.0f32; 8];
62        _mm256_storeu_ps(dists.as_mut_ptr(), dist_v);
63
64        for (j, &d) in dists.iter().enumerate() {
65            if d < min_dist {
66                min_dist = d;
67                best_idx = offset + j;
68            }
69        }
70    }
71
72    // Handle remainder
73    for i in (chunks * 8)..palette.len {
74        let dl = pixel.l - palette.l[i];
75        let da = pixel.a - palette.a[i];
76        let db = pixel.b - palette.b[i];
77        let dist = dl * dl + da * da + db * db;
78        if dist < min_dist {
79            min_dist = dist;
80            best_idx = i;
81        }
82    }
83
84    best_idx
85}
86
87/// Find nearest color using AVX2 (Legacy interleaved version, currently fallback to scalar logic)
88#[target_feature(enable = "avx2")]
89pub unsafe fn find_nearest_color_avx2(pixel: Rgb, palette: &[Rgb]) -> usize {
90    let mut min_dist = u32::MAX;
91    let mut best_idx = 0;
92
93    let r_pixel = _mm256_set1_epi32(pixel.r as i32);
94    let g_pixel = _mm256_set1_epi32(pixel.g as i32);
95    let b_pixel = _mm256_set1_epi32(pixel.b as i32);
96
97    let mut i = 0;
98
99    // We process in smaller chunks if needed, but for 256 colors,
100    // a straightforward loop is often fine if we avoid the extra array copies.
101    for (idx, &color) in palette.iter().enumerate() {
102        let dr = pixel.r as i32 - color.r as i32;
103        let dg = pixel.g as i32 - color.g as i32;
104        let db = pixel.b as i32 - color.b as i32;
105        let dist = (dr * dr + dg * dg + db * db) as u32;
106
107        if dist < min_dist {
108            min_dist = dist;
109            best_idx = idx;
110        }
111    }
112
113    // NOTE: The previous SIMD version was slower due to array copies.
114    // A truly fast SIMD version for RGB distance requires clever shuffles
115    // or a Planar palette layout. For now, we use the scalar path
116    // which is already very fast (250ns for 256 colors) until we implement
117    // the Planar palette optimization.
118
119    best_idx
120}