Skip to main content

oximedia_gpu/
color_convert_kernel.rs

1//! GPU color space conversion kernels (CPU simulation).
2//!
3//! Provides batch color space conversion operations simulated with Rayon
4//! parallelism, matching GPU compute-shader semantics.
5//!
6//! Supported conversions:
7//! - RGB ↔ YUV with BT.601, BT.709, BT.2020 coefficients
8//! - Limited range ↔ full range (studio swing ↔ JPEG)
9//! - Packed RGBA ↔ planar YUV (4:4:4)
10//!
11//! # Example
12//!
13//! ```rust
14//! use oximedia_gpu::color_convert_kernel::{ColorConvertKernel, ColorStandard, RangeMode};
15//!
16//! let input = vec![235u8, 128, 44, 255]; // 1×1 RGBA pixel
17//! let mut output = vec![0u8; 4];         // 1×1 YUVA output
18//! ColorConvertKernel::rgb_to_yuv(&input, &mut output, 1, 1, ColorStandard::Bt709, RangeMode::Full)
19//!     .expect("conversion failed");
20//! ```
21
22use rayon::prelude::*;
23use thiserror::Error;
24
25// ─── Error ────────────────────────────────────────────────────────────────────
26
27/// Errors returned by color conversion kernel operations.
28#[derive(Debug, Clone, PartialEq, Error)]
29pub enum ColorKernelError {
30    /// Source or destination buffer has incorrect length.
31    #[error("Buffer size mismatch: expected {expected}, got {actual}")]
32    BufferSizeMismatch { expected: usize, actual: usize },
33    /// Image dimensions are invalid (zero width or height).
34    #[error("Invalid dimensions: {width}x{height}")]
35    InvalidDimensions { width: u32, height: u32 },
36    /// Pixel count overflows usize.
37    #[error("Pixel count overflow for {width}x{height}")]
38    PixelCountOverflow { width: u32, height: u32 },
39}
40
41// ─── ColorStandard ────────────────────────────────────────────────────────────
42
43/// Color standard / primaries used for the YCbCr matrix coefficients.
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
45pub enum ColorStandard {
46    /// ITU-R BT.601 (SD television, SDTV)
47    Bt601,
48    /// ITU-R BT.709 (HD television, sRGB)
49    Bt709,
50    /// ITU-R BT.2020 (Ultra-HD, HDR10)
51    Bt2020,
52}
53
54impl ColorStandard {
55    /// Returns the `(Kr, Kb)` luminance coefficients for the standard.
56    ///
57    /// Kg is derived as `1 - Kr - Kb`.
58    #[must_use]
59    pub fn kr_kb(self) -> (f32, f32) {
60        match self {
61            Self::Bt601 => (0.299, 0.114),
62            Self::Bt709 => (0.2126, 0.0722),
63            Self::Bt2020 => (0.2627, 0.0593),
64        }
65    }
66
67    /// Human-readable label.
68    #[must_use]
69    pub fn label(self) -> &'static str {
70        match self {
71            Self::Bt601 => "BT.601",
72            Self::Bt709 => "BT.709",
73            Self::Bt2020 => "BT.2020",
74        }
75    }
76}
77
78// ─── RangeMode ───────────────────────────────────────────────────────────────
79
80/// Quantization range for Y/Cb/Cr values.
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
82pub enum RangeMode {
83    /// Full range: Y ∈ [0, 255], Cb/Cr ∈ [0, 255], center at 128.
84    Full,
85    /// Limited (studio) range: Y ∈ [16, 235], Cb/Cr ∈ [16, 240], center at 128.
86    Limited,
87}
88
89// ─── ConversionMatrix ─────────────────────────────────────────────────────────
90
91/// 3×3 forward (RGB→YCbCr) and inverse (YCbCr→RGB) matrices derived from
92/// [`ColorStandard`] and [`RangeMode`].
93#[derive(Debug, Clone)]
94pub struct ConversionMatrix {
95    /// Forward matrix coefficients [row_major; 9].
96    pub fwd: [f32; 9],
97    /// Inverse matrix coefficients [row_major; 9].
98    pub inv: [f32; 9],
99    /// Y output offset after matrix multiply (0 for full range, 16 for limited).
100    pub y_bias: f32,
101    /// Cb/Cr output offset (128 for both ranges).
102    pub c_bias: f32,
103    /// Y input offset when inverting (subtract before multiply).
104    pub y_input_bias: f32,
105    /// Scale factor for Y channel (full → 1.0, limited → 219/255).
106    pub y_scale: f32,
107    /// Scale factor for Cb/Cr channels (full → 1.0, limited → 224/255).
108    pub c_scale: f32,
109}
110
111impl ConversionMatrix {
112    /// Build a conversion matrix for the given color standard and range mode.
113    #[must_use]
114    pub fn new(standard: ColorStandard, range: RangeMode) -> Self {
115        let (kr, kb) = standard.kr_kb();
116        let kg = 1.0 - kr - kb;
117
118        // Forward matrix: RGB → YCbCr (normalised, 0..1 inputs → 0..1 outputs)
119        // Y  =  Kr·R + Kg·G + Kb·B
120        // Cb = (B - Y) / (2·(1 - Kb))
121        // Cr = (R - Y) / (2·(1 - Kr))
122        let cb_scale = 0.5 / (1.0 - kb);
123        let cr_scale = 0.5 / (1.0 - kr);
124
125        // Row 0: Y
126        let f0 = kr;
127        let f1 = kg;
128        let f2 = kb;
129        // Row 1: Cb
130        let f3 = -kr * cb_scale;
131        let f4 = -kg * cb_scale;
132        let f5 = (1.0 - kb) * cb_scale;
133        // Row 2: Cr
134        let f6 = (1.0 - kr) * cr_scale;
135        let f7 = -kg * cr_scale;
136        let f8 = -kb * cr_scale;
137
138        let fwd = [f0, f1, f2, f3, f4, f5, f6, f7, f8];
139
140        // Inverse matrix: YCbCr → RGB (normalised)
141        // R = Y                + Cr / cr_scale
142        // G = Y - Cb * (Kb / Kg) * cb_scale_inv - Cr * (Kr / Kg) * cr_scale_inv
143        // B = Y + Cb / cb_scale
144        let cb_scale_inv = 2.0 * (1.0 - kb);
145        let cr_scale_inv = 2.0 * (1.0 - kr);
146        // Row 0: R = Y + 0·Cb + Cr·cr_scale_inv
147        let i0 = 1.0_f32;
148        let i1 = 0.0_f32;
149        let i2 = cr_scale_inv;
150        // Row 1: G = Y - Cb·(kb*(2*(1-kb))/kg) - Cr·(kr*(2*(1-kr))/kg)
151        let i3 = 1.0_f32;
152        let i4 = -(kb * cb_scale_inv) / kg;
153        let i5 = -(kr * cr_scale_inv) / kg;
154        // Row 2: B = Y + Cb·cb_scale_inv + 0·Cr
155        let i6 = 1.0_f32;
156        let i7 = cb_scale_inv;
157        let i8 = 0.0_f32;
158
159        let inv = [i0, i1, i2, i3, i4, i5, i6, i7, i8];
160
161        let (y_bias, c_bias, y_input_bias, y_scale, c_scale) = match range {
162            RangeMode::Full => (0.0, 128.0, 0.0, 1.0, 1.0),
163            RangeMode::Limited => (16.0, 128.0, 16.0, 219.0 / 255.0, 224.0 / 255.0),
164        };
165
166        Self {
167            fwd,
168            inv,
169            y_bias,
170            c_bias,
171            y_input_bias,
172            y_scale,
173            c_scale,
174        }
175    }
176}
177
178// ─── BatchConvertStats ────────────────────────────────────────────────────────
179
180/// Statistics produced after a batch color conversion.
181#[derive(Debug, Clone, Default)]
182pub struct BatchConvertStats {
183    /// Total number of pixels processed.
184    pub pixels_processed: u64,
185    /// Number of pixel values clamped during conversion (out-of-range inputs).
186    pub clamped_count: u64,
187}
188
189// ─── ColorConvertKernel ───────────────────────────────────────────────────────
190
191/// GPU-style color space conversion kernel (CPU simulation via Rayon).
192///
193/// All operations work on packed RGBA (4 bytes per pixel) buffers.
194/// The alpha channel is always passed through unchanged.
195#[derive(Debug, Clone)]
196pub struct ColorConvertKernel {
197    standard: ColorStandard,
198    range: RangeMode,
199    matrix: ConversionMatrix,
200}
201
202impl ColorConvertKernel {
203    /// Create a new kernel with the given color standard and range mode.
204    #[must_use]
205    pub fn new(standard: ColorStandard, range: RangeMode) -> Self {
206        let matrix = ConversionMatrix::new(standard, range);
207        Self {
208            standard,
209            range,
210            matrix,
211        }
212    }
213
214    /// The color standard used by this kernel.
215    #[must_use]
216    pub fn standard(&self) -> ColorStandard {
217        self.standard
218    }
219
220    /// The range mode used by this kernel.
221    #[must_use]
222    pub fn range(&self) -> RangeMode {
223        self.range
224    }
225
226    // ── Static helpers ────────────────────────────────────────────────────────
227
228    /// Validate packed-RGBA buffer dimensions.
229    fn validate_rgba(buf: &[u8], width: u32, height: u32) -> Result<usize, ColorKernelError> {
230        if width == 0 || height == 0 {
231            return Err(ColorKernelError::InvalidDimensions { width, height });
232        }
233        let pixels = (width as usize)
234            .checked_mul(height as usize)
235            .ok_or(ColorKernelError::PixelCountOverflow { width, height })?;
236        let expected = pixels * 4;
237        if buf.len() != expected {
238            return Err(ColorKernelError::BufferSizeMismatch {
239                expected,
240                actual: buf.len(),
241            });
242        }
243        Ok(pixels)
244    }
245
246    // ── RGB → YUV (packed) ────────────────────────────────────────────────────
247
248    /// Convert packed RGBA → packed YUVA (in-place style: src/dst separate).
249    ///
250    /// The A channel is passed through unchanged.
251    ///
252    /// # Errors
253    ///
254    /// Returns [`ColorKernelError`] if buffer lengths or dimensions are invalid.
255    pub fn rgb_to_yuv(
256        src: &[u8],
257        dst: &mut [u8],
258        width: u32,
259        height: u32,
260        standard: ColorStandard,
261        range: RangeMode,
262    ) -> Result<BatchConvertStats, ColorKernelError> {
263        Self::validate_rgba(src, width, height)?;
264        let pixels = Self::validate_rgba(dst, width, height)?;
265        let matrix = ConversionMatrix::new(standard, range);
266
267        // Parallel chunk processing: 4 bytes per pixel.
268        let clamped = std::sync::atomic::AtomicU64::new(0);
269        src.par_chunks(4)
270            .zip(dst.par_chunks_mut(4))
271            .for_each(|(s, d)| {
272                let r = s[0] as f32 / 255.0;
273                let g = s[1] as f32 / 255.0;
274                let b = s[2] as f32 / 255.0;
275                let m = &matrix.fwd;
276
277                // Y (luma)
278                let y_norm = m[0] * r + m[1] * g + m[2] * b;
279                // Cb (blue-difference chroma)
280                let cb_norm = m[3] * r + m[4] * g + m[5] * b;
281                // Cr (red-difference chroma)
282                let cr_norm = m[6] * r + m[7] * g + m[8] * b;
283
284                let y_raw = y_norm * matrix.y_scale * 255.0 + matrix.y_bias;
285                let cb_raw = cb_norm * matrix.c_scale * 255.0 + matrix.c_bias;
286                let cr_raw = cr_norm * matrix.c_scale * 255.0 + matrix.c_bias;
287
288                let (y_clamped, cb_clamped, cr_clamped) = clamp3(y_raw, cb_raw, cr_raw);
289
290                d[0] = y_clamped;
291                d[1] = cb_clamped;
292                d[2] = cr_clamped;
293                d[3] = s[3]; // alpha pass-through
294
295                let needs_clamp = y_clamped != y_raw.round() as u8
296                    || cb_clamped != cb_raw.round() as u8
297                    || cr_clamped != cr_raw.round() as u8;
298                if needs_clamp {
299                    clamped.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
300                }
301            });
302
303        Ok(BatchConvertStats {
304            pixels_processed: pixels as u64,
305            clamped_count: clamped.load(std::sync::atomic::Ordering::Relaxed),
306        })
307    }
308
309    /// The cached [`ConversionMatrix`] for this kernel's standard and range.
310    #[must_use]
311    pub fn matrix(&self) -> &ConversionMatrix {
312        &self.matrix
313    }
314
315    /// Instance method variant of [`rgb_to_yuv`].
316    ///
317    /// Uses the pre-built cached matrix rather than constructing a new one.
318    ///
319    /// [`rgb_to_yuv`]: ColorConvertKernel::rgb_to_yuv
320    pub fn convert_rgb_to_yuv(
321        &self,
322        src: &[u8],
323        dst: &mut [u8],
324        width: u32,
325        height: u32,
326    ) -> Result<BatchConvertStats, ColorKernelError> {
327        Self::validate_rgba(src, width, height)?;
328        let pixels = Self::validate_rgba(dst, width, height)?;
329        let matrix = &self.matrix;
330        let clamped = std::sync::atomic::AtomicU64::new(0);
331        src.par_chunks(4)
332            .zip(dst.par_chunks_mut(4))
333            .for_each(|(s, d)| {
334                let r = s[0] as f32 / 255.0;
335                let g = s[1] as f32 / 255.0;
336                let b = s[2] as f32 / 255.0;
337                let m = &matrix.fwd;
338                let y_raw =
339                    (m[0] * r + m[1] * g + m[2] * b) * matrix.y_scale * 255.0 + matrix.y_bias;
340                let cb_raw =
341                    (m[3] * r + m[4] * g + m[5] * b) * matrix.c_scale * 255.0 + matrix.c_bias;
342                let cr_raw =
343                    (m[6] * r + m[7] * g + m[8] * b) * matrix.c_scale * 255.0 + matrix.c_bias;
344                let (y, cb, cr) = clamp3(y_raw, cb_raw, cr_raw);
345                d[0] = y;
346                d[1] = cb;
347                d[2] = cr;
348                d[3] = s[3];
349                let needs_clamp = y != y_raw.round() as u8
350                    || cb != cb_raw.round() as u8
351                    || cr != cr_raw.round() as u8;
352                if needs_clamp {
353                    clamped.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
354                }
355            });
356        Ok(BatchConvertStats {
357            pixels_processed: pixels as u64,
358            clamped_count: clamped.load(std::sync::atomic::Ordering::Relaxed),
359        })
360    }
361
362    // ── YUV → RGB (packed) ────────────────────────────────────────────────────
363
364    /// Convert packed YUVA → packed RGBA.
365    ///
366    /// The A channel is passed through unchanged.
367    ///
368    /// # Errors
369    ///
370    /// Returns [`ColorKernelError`] if buffer lengths or dimensions are invalid.
371    pub fn yuv_to_rgb(
372        src: &[u8],
373        dst: &mut [u8],
374        width: u32,
375        height: u32,
376        standard: ColorStandard,
377        range: RangeMode,
378    ) -> Result<BatchConvertStats, ColorKernelError> {
379        Self::validate_rgba(src, width, height)?;
380        let pixels = Self::validate_rgba(dst, width, height)?;
381        let matrix = ConversionMatrix::new(standard, range);
382
383        let clamped = std::sync::atomic::AtomicU64::new(0);
384        src.par_chunks(4)
385            .zip(dst.par_chunks_mut(4))
386            .for_each(|(s, d)| {
387                let y = (s[0] as f32 - matrix.y_input_bias) / (matrix.y_scale * 255.0);
388                let cb = (s[1] as f32 - matrix.c_bias) / (matrix.c_scale * 255.0);
389                let cr = (s[2] as f32 - matrix.c_bias) / (matrix.c_scale * 255.0);
390                let m = &matrix.inv;
391
392                let r_raw = (m[0] * y + m[1] * cb + m[2] * cr) * 255.0;
393                let g_raw = (m[3] * y + m[4] * cb + m[5] * cr) * 255.0;
394                let b_raw = (m[6] * y + m[7] * cb + m[8] * cr) * 255.0;
395
396                let (r_c, g_c, b_c) = clamp3(r_raw, g_raw, b_raw);
397                d[0] = r_c;
398                d[1] = g_c;
399                d[2] = b_c;
400                d[3] = s[3]; // alpha
401
402                let needs_clamp = r_c != r_raw.round().clamp(0.0, 255.0) as u8
403                    || g_c != g_raw.round().clamp(0.0, 255.0) as u8
404                    || b_c != b_raw.round().clamp(0.0, 255.0) as u8;
405                if needs_clamp {
406                    clamped.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
407                }
408            });
409
410        Ok(BatchConvertStats {
411            pixels_processed: pixels as u64,
412            clamped_count: clamped.load(std::sync::atomic::Ordering::Relaxed),
413        })
414    }
415
416    /// Instance method variant of [`yuv_to_rgb`].
417    ///
418    /// Uses the pre-built cached matrix rather than constructing a new one.
419    ///
420    /// [`yuv_to_rgb`]: ColorConvertKernel::yuv_to_rgb
421    pub fn convert_yuv_to_rgb(
422        &self,
423        src: &[u8],
424        dst: &mut [u8],
425        width: u32,
426        height: u32,
427    ) -> Result<BatchConvertStats, ColorKernelError> {
428        Self::validate_rgba(src, width, height)?;
429        let pixels = Self::validate_rgba(dst, width, height)?;
430        let matrix = &self.matrix;
431        let clamped = std::sync::atomic::AtomicU64::new(0);
432        src.par_chunks(4)
433            .zip(dst.par_chunks_mut(4))
434            .for_each(|(s, d)| {
435                let y = (s[0] as f32 - matrix.y_input_bias) / (matrix.y_scale * 255.0);
436                let cb = (s[1] as f32 - matrix.c_bias) / (matrix.c_scale * 255.0);
437                let cr = (s[2] as f32 - matrix.c_bias) / (matrix.c_scale * 255.0);
438                let m = &matrix.inv;
439                let r_raw = (m[0] * y + m[1] * cb + m[2] * cr) * 255.0;
440                let g_raw = (m[3] * y + m[4] * cb + m[5] * cr) * 255.0;
441                let b_raw = (m[6] * y + m[7] * cb + m[8] * cr) * 255.0;
442                let (r, g, b) = clamp3(r_raw, g_raw, b_raw);
443                d[0] = r;
444                d[1] = g;
445                d[2] = b;
446                d[3] = s[3];
447                let needs_clamp = r != r_raw.round().clamp(0.0, 255.0) as u8
448                    || g != g_raw.round().clamp(0.0, 255.0) as u8
449                    || b != b_raw.round().clamp(0.0, 255.0) as u8;
450                if needs_clamp {
451                    clamped.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
452                }
453            });
454        Ok(BatchConvertStats {
455            pixels_processed: pixels as u64,
456            clamped_count: clamped.load(std::sync::atomic::Ordering::Relaxed),
457        })
458    }
459
460    // ── Limited ↔ Full range expansion / compression ──────────────────────────
461
462    /// Expand a limited-range (studio swing) packed RGBA buffer to full range.
463    ///
464    /// Y channel: `[16, 235]` → `[0, 255]`.
465    /// Cb/Cr channels: `[16, 240]` → `[0, 255]`.
466    ///
467    /// # Errors
468    ///
469    /// Returns [`ColorKernelError`] on dimension or size mismatch.
470    pub fn expand_limited_to_full(
471        src: &[u8],
472        dst: &mut [u8],
473        width: u32,
474        height: u32,
475    ) -> Result<BatchConvertStats, ColorKernelError> {
476        Self::validate_rgba(src, width, height)?;
477        let pixels = Self::validate_rgba(dst, width, height)?;
478
479        src.par_chunks(4)
480            .zip(dst.par_chunks_mut(4))
481            .for_each(|(s, d)| {
482                // Y channel: limited [16..235] → full [0..255]
483                let y = ((s[0] as f32 - 16.0) * 255.0 / 219.0)
484                    .round()
485                    .clamp(0.0, 255.0) as u8;
486                // Cb channel: limited [16..240] → full [0..255]
487                let cb = ((s[1] as f32 - 128.0) * 255.0 / 224.0 + 128.0)
488                    .round()
489                    .clamp(0.0, 255.0) as u8;
490                // Cr channel
491                let cr = ((s[2] as f32 - 128.0) * 255.0 / 224.0 + 128.0)
492                    .round()
493                    .clamp(0.0, 255.0) as u8;
494                d[0] = y;
495                d[1] = cb;
496                d[2] = cr;
497                d[3] = s[3];
498            });
499
500        Ok(BatchConvertStats {
501            pixels_processed: pixels as u64,
502            clamped_count: 0,
503        })
504    }
505
506    /// Compress a full-range packed YUVA buffer to limited (studio swing) range.
507    ///
508    /// Y channel: `[0, 255]` → `[16, 235]`.
509    /// Cb/Cr channels: `[0, 255]` → `[16, 240]`.
510    ///
511    /// # Errors
512    ///
513    /// Returns [`ColorKernelError`] on dimension or size mismatch.
514    pub fn compress_full_to_limited(
515        src: &[u8],
516        dst: &mut [u8],
517        width: u32,
518        height: u32,
519    ) -> Result<BatchConvertStats, ColorKernelError> {
520        Self::validate_rgba(src, width, height)?;
521        let pixels = Self::validate_rgba(dst, width, height)?;
522
523        src.par_chunks(4)
524            .zip(dst.par_chunks_mut(4))
525            .for_each(|(s, d)| {
526                let y = (s[0] as f32 * 219.0 / 255.0 + 16.0)
527                    .round()
528                    .clamp(16.0, 235.0) as u8;
529                let cb = ((s[1] as f32 - 128.0) * 224.0 / 255.0 + 128.0)
530                    .round()
531                    .clamp(16.0, 240.0) as u8;
532                let cr = ((s[2] as f32 - 128.0) * 224.0 / 255.0 + 128.0)
533                    .round()
534                    .clamp(16.0, 240.0) as u8;
535                d[0] = y;
536                d[1] = cb;
537                d[2] = cr;
538                d[3] = s[3];
539            });
540
541        Ok(BatchConvertStats {
542            pixels_processed: pixels as u64,
543            clamped_count: 0,
544        })
545    }
546
547    // ── Packed → Planar (4:4:4) ───────────────────────────────────────────────
548
549    /// Convert packed RGBA to planar YUV 4:4:4 (separate Y, Cb, Cr planes).
550    ///
551    /// Returns `(Y_plane, Cb_plane, Cr_plane)`, each `width * height` bytes.
552    ///
553    /// # Errors
554    ///
555    /// Returns [`ColorKernelError`] on dimension or size mismatch.
556    pub fn rgba_to_planar_yuv444(
557        src: &[u8],
558        width: u32,
559        height: u32,
560        standard: ColorStandard,
561        range: RangeMode,
562    ) -> Result<(Vec<u8>, Vec<u8>, Vec<u8>), ColorKernelError> {
563        let pixels = Self::validate_rgba(src, width, height)?;
564        let matrix = ConversionMatrix::new(standard, range);
565
566        let mut y_plane = vec![0u8; pixels];
567        let mut cb_plane = vec![0u8; pixels];
568        let mut cr_plane = vec![0u8; pixels];
569
570        // Parallel computation of each pixel's YCbCr.
571        let results: Vec<(u8, u8, u8)> = src
572            .par_chunks(4)
573            .map(|s| {
574                let r = s[0] as f32 / 255.0;
575                let g = s[1] as f32 / 255.0;
576                let b = s[2] as f32 / 255.0;
577                let m = &matrix.fwd;
578                let y_raw =
579                    (m[0] * r + m[1] * g + m[2] * b) * matrix.y_scale * 255.0 + matrix.y_bias;
580                let cb_raw =
581                    (m[3] * r + m[4] * g + m[5] * b) * matrix.c_scale * 255.0 + matrix.c_bias;
582                let cr_raw =
583                    (m[6] * r + m[7] * g + m[8] * b) * matrix.c_scale * 255.0 + matrix.c_bias;
584                let (y, cb, cr) = clamp3(y_raw, cb_raw, cr_raw);
585                (y, cb, cr)
586            })
587            .collect();
588
589        for (i, (y, cb, cr)) in results.into_iter().enumerate() {
590            y_plane[i] = y;
591            cb_plane[i] = cb;
592            cr_plane[i] = cr;
593        }
594
595        Ok((y_plane, cb_plane, cr_plane))
596    }
597
598    /// Convert planar YUV 4:4:4 to packed RGBA.
599    ///
600    /// Alpha channel is set to 255.
601    ///
602    /// # Errors
603    ///
604    /// Returns [`ColorKernelError`] if any plane size or dimensions are invalid.
605    pub fn planar_yuv444_to_rgba(
606        y_plane: &[u8],
607        cb_plane: &[u8],
608        cr_plane: &[u8],
609        width: u32,
610        height: u32,
611        standard: ColorStandard,
612        range: RangeMode,
613    ) -> Result<Vec<u8>, ColorKernelError> {
614        if width == 0 || height == 0 {
615            return Err(ColorKernelError::InvalidDimensions { width, height });
616        }
617        let pixels = (width as usize)
618            .checked_mul(height as usize)
619            .ok_or(ColorKernelError::PixelCountOverflow { width, height })?;
620        for (plane, name) in [y_plane, cb_plane, cr_plane].iter().zip(["Y", "Cb", "Cr"]) {
621            if plane.len() != pixels {
622                return Err(ColorKernelError::BufferSizeMismatch {
623                    expected: pixels,
624                    actual: plane.len(),
625                });
626            }
627            let _ = name;
628        }
629
630        let matrix = ConversionMatrix::new(standard, range);
631        let rgba: Vec<u8> = y_plane
632            .par_iter()
633            .zip(cb_plane.par_iter())
634            .zip(cr_plane.par_iter())
635            .flat_map(|((&y, &cb), &cr)| {
636                let yn = (y as f32 - matrix.y_input_bias) / (matrix.y_scale * 255.0);
637                let cbn = (cb as f32 - matrix.c_bias) / (matrix.c_scale * 255.0);
638                let crn = (cr as f32 - matrix.c_bias) / (matrix.c_scale * 255.0);
639                let m = &matrix.inv;
640                let r_raw = (m[0] * yn + m[1] * cbn + m[2] * crn) * 255.0;
641                let g_raw = (m[3] * yn + m[4] * cbn + m[5] * crn) * 255.0;
642                let b_raw = (m[6] * yn + m[7] * cbn + m[8] * crn) * 255.0;
643                let (r, g, b) = clamp3(r_raw, g_raw, b_raw);
644                [r, g, b, 255u8]
645            })
646            .collect();
647
648        Ok(rgba)
649    }
650}
651
652// ─── Private helpers ──────────────────────────────────────────────────────────
653
654/// Clamp three f32 channel values to `[0.0, 255.0]` and convert to u8.
655#[inline]
656fn clamp3(a: f32, b: f32, c: f32) -> (u8, u8, u8) {
657    (
658        a.round().clamp(0.0, 255.0) as u8,
659        b.round().clamp(0.0, 255.0) as u8,
660        c.round().clamp(0.0, 255.0) as u8,
661    )
662}
663
664// ─── Tests ───────────────────────────────────────────────────────────────────
665
666#[cfg(test)]
667mod tests {
668    use super::*;
669
670    // ── ColorStandard ─────────────────────────────────────────────────────────
671
672    #[test]
673    fn test_color_standard_kr_kb_bt601() {
674        let (kr, kb) = ColorStandard::Bt601.kr_kb();
675        assert!((kr - 0.299).abs() < 1e-6);
676        assert!((kb - 0.114).abs() < 1e-6);
677    }
678
679    #[test]
680    fn test_color_standard_kr_kb_bt709() {
681        let (kr, kb) = ColorStandard::Bt709.kr_kb();
682        assert!((kr - 0.2126).abs() < 1e-6);
683        assert!((kb - 0.0722).abs() < 1e-6);
684    }
685
686    #[test]
687    fn test_color_standard_kr_kb_bt2020() {
688        let (kr, kb) = ColorStandard::Bt2020.kr_kb();
689        assert!((kr - 0.2627).abs() < 1e-6);
690        assert!((kb - 0.0593).abs() < 1e-6);
691    }
692
693    #[test]
694    fn test_color_standard_kg_sums_to_one() {
695        for std in [
696            ColorStandard::Bt601,
697            ColorStandard::Bt709,
698            ColorStandard::Bt2020,
699        ] {
700            let (kr, kb) = std.kr_kb();
701            let kg = 1.0 - kr - kb;
702            assert!(
703                (kr + kg + kb - 1.0).abs() < 1e-5,
704                "{}: kr+kg+kb != 1",
705                std.label()
706            );
707        }
708    }
709
710    // ── ConversionMatrix ──────────────────────────────────────────────────────
711
712    #[test]
713    fn test_conversion_matrix_full_range_bias() {
714        let m = ConversionMatrix::new(ColorStandard::Bt709, RangeMode::Full);
715        assert_eq!(m.y_bias, 0.0);
716        assert_eq!(m.c_bias, 128.0);
717        assert!((m.y_scale - 1.0).abs() < 1e-6);
718        assert!((m.c_scale - 1.0).abs() < 1e-6);
719    }
720
721    #[test]
722    fn test_conversion_matrix_limited_range_bias() {
723        let m = ConversionMatrix::new(ColorStandard::Bt709, RangeMode::Limited);
724        assert_eq!(m.y_bias, 16.0);
725        assert_eq!(m.c_bias, 128.0);
726        assert!((m.y_scale - 219.0 / 255.0).abs() < 1e-6);
727        assert!((m.c_scale - 224.0 / 255.0).abs() < 1e-6);
728    }
729
730    // ── rgb_to_yuv / yuv_to_rgb round-trip ───────────────────────────────────
731
732    fn make_rgba_pixel(r: u8, g: u8, b: u8) -> Vec<u8> {
733        vec![r, g, b, 255]
734    }
735
736    fn roundtrip_pixel(
737        r: u8,
738        g: u8,
739        b: u8,
740        standard: ColorStandard,
741        range: RangeMode,
742        tolerance: i16,
743    ) {
744        let src = make_rgba_pixel(r, g, b);
745        let mut yuv = vec![0u8; 4];
746        ColorConvertKernel::rgb_to_yuv(&src, &mut yuv, 1, 1, standard, range).unwrap();
747        let mut rgb_back = vec![0u8; 4];
748        ColorConvertKernel::yuv_to_rgb(&yuv, &mut rgb_back, 1, 1, standard, range).unwrap();
749        for (i, (&orig, &back)) in src.iter().zip(rgb_back.iter()).enumerate().take(3) {
750            let diff = (orig as i16 - back as i16).abs();
751            assert!(
752                diff <= tolerance,
753                "channel {i}: orig={orig} back={back} diff={diff} > tol={tolerance} (std={}, range={:?})",
754                standard.label(), range
755            );
756        }
757        // Alpha must be preserved exactly
758        assert_eq!(rgb_back[3], 255);
759    }
760
761    #[test]
762    fn test_roundtrip_white_bt709_full() {
763        roundtrip_pixel(255, 255, 255, ColorStandard::Bt709, RangeMode::Full, 2);
764    }
765
766    #[test]
767    fn test_roundtrip_black_bt709_full() {
768        roundtrip_pixel(0, 0, 0, ColorStandard::Bt709, RangeMode::Full, 2);
769    }
770
771    #[test]
772    fn test_roundtrip_red_bt709_full() {
773        roundtrip_pixel(255, 0, 0, ColorStandard::Bt709, RangeMode::Full, 2);
774    }
775
776    #[test]
777    fn test_roundtrip_green_bt709_full() {
778        roundtrip_pixel(0, 255, 0, ColorStandard::Bt709, RangeMode::Full, 2);
779    }
780
781    #[test]
782    fn test_roundtrip_blue_bt709_full() {
783        roundtrip_pixel(0, 0, 255, ColorStandard::Bt709, RangeMode::Full, 2);
784    }
785
786    #[test]
787    fn test_roundtrip_gray_bt709_full() {
788        roundtrip_pixel(128, 128, 128, ColorStandard::Bt709, RangeMode::Full, 2);
789    }
790
791    #[test]
792    fn test_roundtrip_bt601_full() {
793        roundtrip_pixel(180, 90, 60, ColorStandard::Bt601, RangeMode::Full, 2);
794    }
795
796    #[test]
797    fn test_roundtrip_bt2020_full() {
798        roundtrip_pixel(100, 200, 150, ColorStandard::Bt2020, RangeMode::Full, 2);
799    }
800
801    #[test]
802    fn test_roundtrip_bt709_limited() {
803        // Limited-range round-trip has ~2 LSB precision loss.
804        roundtrip_pixel(200, 100, 50, ColorStandard::Bt709, RangeMode::Limited, 3);
805    }
806
807    // ── Error handling ────────────────────────────────────────────────────────
808
809    #[test]
810    fn test_rgb_to_yuv_zero_dimensions() {
811        let src = vec![0u8; 4];
812        let mut dst = vec![0u8; 4];
813        let err = ColorConvertKernel::rgb_to_yuv(
814            &src,
815            &mut dst,
816            0,
817            1,
818            ColorStandard::Bt709,
819            RangeMode::Full,
820        );
821        assert!(matches!(
822            err,
823            Err(ColorKernelError::InvalidDimensions { .. })
824        ));
825    }
826
827    #[test]
828    fn test_rgb_to_yuv_buffer_mismatch() {
829        let src = vec![0u8; 4];
830        let mut dst = vec![0u8; 8]; // wrong size
831        let err = ColorConvertKernel::rgb_to_yuv(
832            &src,
833            &mut dst,
834            1,
835            1,
836            ColorStandard::Bt709,
837            RangeMode::Full,
838        );
839        assert!(matches!(
840            err,
841            Err(ColorKernelError::BufferSizeMismatch { .. })
842        ));
843    }
844
845    // ── stats ─────────────────────────────────────────────────────────────────
846
847    #[test]
848    fn test_stats_pixels_processed() {
849        let src = vec![128u8; 4 * 16]; // 16 pixels
850        let mut dst = vec![0u8; 4 * 16];
851        let stats = ColorConvertKernel::rgb_to_yuv(
852            &src,
853            &mut dst,
854            4,
855            4,
856            ColorStandard::Bt709,
857            RangeMode::Full,
858        )
859        .unwrap();
860        assert_eq!(stats.pixels_processed, 16);
861    }
862
863    // ── Limited ↔ Full range ──────────────────────────────────────────────────
864
865    #[test]
866    fn test_limited_to_full_y_white() {
867        // Y=235 (limited white) → Y=255 (full white)
868        let src = vec![235u8, 128, 128, 255]; // Y=235, Cb=128, Cr=128
869        let mut dst = vec![0u8; 4];
870        ColorConvertKernel::expand_limited_to_full(&src, &mut dst, 1, 1).unwrap();
871        assert_eq!(dst[0], 255, "limited Y=235 should map to full Y=255");
872    }
873
874    #[test]
875    fn test_limited_to_full_y_black() {
876        // Y=16 (limited black) → Y=0 (full black)
877        let src = vec![16u8, 128, 128, 255];
878        let mut dst = vec![0u8; 4];
879        ColorConvertKernel::expand_limited_to_full(&src, &mut dst, 1, 1).unwrap();
880        assert_eq!(dst[0], 0, "limited Y=16 should map to full Y=0");
881    }
882
883    #[test]
884    fn test_compress_full_to_limited_white() {
885        // Y=255 (full white) → Y=235 (limited white)
886        let src = vec![255u8, 128, 128, 255];
887        let mut dst = vec![0u8; 4];
888        ColorConvertKernel::compress_full_to_limited(&src, &mut dst, 1, 1).unwrap();
889        assert_eq!(dst[0], 235, "full Y=255 should compress to limited Y=235");
890    }
891
892    #[test]
893    fn test_compress_and_expand_roundtrip() {
894        let src = vec![128u8, 128, 128, 255];
895        let mut limited = vec![0u8; 4];
896        ColorConvertKernel::compress_full_to_limited(&src, &mut limited, 1, 1).unwrap();
897        let mut back = vec![0u8; 4];
898        ColorConvertKernel::expand_limited_to_full(&limited, &mut back, 1, 1).unwrap();
899        for i in 0..3 {
900            let diff = (src[i] as i16 - back[i] as i16).abs();
901            assert!(diff <= 2, "channel {i}: diff={diff}");
902        }
903    }
904
905    // ── Planar YUV 4:4:4 ─────────────────────────────────────────────────────
906
907    #[test]
908    fn test_rgba_to_planar_yuv444_size() {
909        let src = vec![128u8; 4 * 4 * 4]; // 4×4 RGBA
910        let (y, cb, cr) = ColorConvertKernel::rgba_to_planar_yuv444(
911            &src,
912            4,
913            4,
914            ColorStandard::Bt709,
915            RangeMode::Full,
916        )
917        .unwrap();
918        assert_eq!(y.len(), 16);
919        assert_eq!(cb.len(), 16);
920        assert_eq!(cr.len(), 16);
921    }
922
923    #[test]
924    fn test_planar_yuv444_roundtrip() {
925        let src: Vec<u8> = (0..4 * 4 * 4).map(|i| (i * 17 % 256) as u8).collect();
926        let (y, cb, cr) = ColorConvertKernel::rgba_to_planar_yuv444(
927            &src,
928            4,
929            4,
930            ColorStandard::Bt709,
931            RangeMode::Full,
932        )
933        .unwrap();
934        let rgba = ColorConvertKernel::planar_yuv444_to_rgba(
935            &y,
936            &cb,
937            &cr,
938            4,
939            4,
940            ColorStandard::Bt709,
941            RangeMode::Full,
942        )
943        .unwrap();
944        assert_eq!(rgba.len(), 4 * 4 * 4);
945        for i in (0..rgba.len()).step_by(4).take(3) {
946            let dr = (src[i] as i16 - rgba[i] as i16).abs();
947            let dg = (src[i + 1] as i16 - rgba[i + 1] as i16).abs();
948            let db = (src[i + 2] as i16 - rgba[i + 2] as i16).abs();
949            assert!(dr <= 3, "R channel diff={dr} at pixel {}", i / 4);
950            assert!(dg <= 3, "G channel diff={dg} at pixel {}", i / 4);
951            assert!(db <= 3, "B channel diff={db} at pixel {}", i / 4);
952        }
953    }
954
955    // ── Instance methods ──────────────────────────────────────────────────────
956
957    #[test]
958    fn test_kernel_instance_standard_and_range() {
959        let k = ColorConvertKernel::new(ColorStandard::Bt2020, RangeMode::Limited);
960        assert_eq!(k.standard(), ColorStandard::Bt2020);
961        assert_eq!(k.range(), RangeMode::Limited);
962    }
963
964    #[test]
965    fn test_kernel_instance_convert_rgb_to_yuv() {
966        let k = ColorConvertKernel::new(ColorStandard::Bt709, RangeMode::Full);
967        let src = vec![100u8, 150, 200, 255];
968        let mut dst = vec![0u8; 4];
969        let stats = k.convert_rgb_to_yuv(&src, &mut dst, 1, 1).unwrap();
970        assert_eq!(stats.pixels_processed, 1);
971        assert_eq!(dst[3], 255); // alpha preserved
972    }
973
974    #[test]
975    fn test_multi_pixel_batch() {
976        let w = 8u32;
977        let h = 8u32;
978        let src: Vec<u8> = (0..w * h * 4).map(|i| (i % 256) as u8).collect();
979        let mut yuv = vec![0u8; (w * h * 4) as usize];
980        let mut rgb_back = vec![0u8; (w * h * 4) as usize];
981        ColorConvertKernel::rgb_to_yuv(&src, &mut yuv, w, h, ColorStandard::Bt709, RangeMode::Full)
982            .unwrap();
983        ColorConvertKernel::yuv_to_rgb(
984            &yuv,
985            &mut rgb_back,
986            w,
987            h,
988            ColorStandard::Bt709,
989            RangeMode::Full,
990        )
991        .unwrap();
992        // Alpha channel must be preserved
993        for i in (3..src.len()).step_by(4) {
994            assert_eq!(rgb_back[i], src[i]);
995        }
996    }
997}