Skip to main content

zenpixels_convert/
convert.rs

1//! Row-level pixel conversion kernels.
2//!
3//! Each kernel converts one row of `width` pixels from a source format to
4//! a destination format. Kernels are pure functions with no allocation.
5
6use alloc::vec;
7use alloc::vec::Vec;
8use core::cmp::min;
9
10use crate::policy::{AlphaPolicy, ConvertOptions, DepthPolicy};
11use crate::{
12    AlphaMode, ChannelLayout, ChannelType, ColorPrimaries, ConvertError, PixelDescriptor,
13    TransferFunction,
14};
15
16/// Pre-computed conversion plan.
17///
18/// Stores the chain of steps needed to convert from one format to another.
19/// Created once, applied to every row.
20#[derive(Clone, Debug)]
21pub struct ConvertPlan {
22    pub(crate) from: PixelDescriptor,
23    pub(crate) to: PixelDescriptor,
24    pub(crate) steps: Vec<ConvertStep>,
25}
26
27/// A single conversion step.
28#[derive(Clone, Copy, Debug, PartialEq, Eq)]
29pub(crate) enum ConvertStep {
30    /// No-op (identity).
31    Identity,
32    /// BGRA → RGBA byte swizzle (or vice versa).
33    SwizzleBgraRgba,
34    /// Add alpha channel (3ch → 4ch), filling with opaque.
35    AddAlpha,
36    /// Drop alpha channel (4ch → 3ch).
37    DropAlpha,
38    /// Gray → RGB (replicate gray to all 3 channels).
39    GrayToRgb,
40    /// Gray → RGBA (replicate + opaque alpha).
41    GrayToRgba,
42    /// RGB → Gray (BT.709 luma).
43    RgbToGray,
44    /// RGBA → Gray (BT.709 luma, drop alpha).
45    RgbaToGray,
46    /// GrayAlpha → RGBA (replicate gray, keep alpha).
47    GrayAlphaToRgba,
48    /// GrayAlpha → RGB (replicate gray, drop alpha).
49    GrayAlphaToRgb,
50    /// Gray → GrayAlpha (add opaque alpha).
51    GrayToGrayAlpha,
52    /// GrayAlpha → Gray (drop alpha).
53    GrayAlphaToGray,
54    /// sRGB u8 → linear f32 (per channel, EOTF).
55    SrgbU8ToLinearF32,
56    /// Linear f32 → sRGB u8 (per channel, OETF).
57    LinearF32ToSrgbU8,
58    /// Naive u8 → f32 (v / 255.0, no gamma).
59    NaiveU8ToF32,
60    /// Naive f32 → u8 (clamp * 255 + 0.5, no gamma).
61    NaiveF32ToU8,
62    /// u16 → u8 ((v * 255 + 32768) >> 16).
63    U16ToU8,
64    /// u8 → u16 (v * 257).
65    U8ToU16,
66    /// u16 → f32 (v / 65535.0).
67    U16ToF32,
68    /// f32 → u16 (clamp * 65535 + 0.5).
69    F32ToU16,
70    /// PQ (SMPTE ST 2084) u16 → linear f32 (EOTF).
71    PqU16ToLinearF32,
72    /// Linear f32 → PQ u16 (inverse EOTF / OETF).
73    LinearF32ToPqU16,
74    /// PQ f32 [0,1] → linear f32 (EOTF, no depth change).
75    PqF32ToLinearF32,
76    /// Linear f32 → PQ f32 [0,1] (OETF, no depth change).
77    LinearF32ToPqF32,
78    /// HLG (ARIB STD-B67) u16 → linear f32 (EOTF).
79    HlgU16ToLinearF32,
80    /// Linear f32 → HLG u16 (OETF).
81    LinearF32ToHlgU16,
82    /// HLG f32 [0,1] → linear f32 (EOTF, no depth change).
83    HlgF32ToLinearF32,
84    /// Linear f32 → HLG f32 [0,1] (OETF, no depth change).
85    LinearF32ToHlgF32,
86    /// Straight → Premultiplied alpha.
87    StraightToPremul,
88    /// Premultiplied → Straight alpha.
89    PremulToStraight,
90    /// Linear RGB f32 → Oklab f32 (3-channel color model change).
91    LinearRgbToOklab,
92    /// Oklab f32 → Linear RGB f32 (3-channel color model change).
93    OklabToLinearRgb,
94    /// Linear RGBA f32 → Oklaba f32 (4-channel, alpha preserved).
95    LinearRgbaToOklaba,
96    /// Oklaba f32 → Linear RGBA f32 (4-channel, alpha preserved).
97    OklabaToLinearRgba,
98}
99
100impl ConvertPlan {
101    /// Create a conversion plan from `from` to `to`.
102    ///
103    /// Returns `Err` if no conversion path exists.
104    pub fn new(from: PixelDescriptor, to: PixelDescriptor) -> Result<Self, ConvertError> {
105        if from == to {
106            return Ok(Self {
107                from,
108                to,
109                steps: vec![ConvertStep::Identity],
110            });
111        }
112
113        let mut steps = Vec::with_capacity(3);
114
115        // Step 1: Layout conversion (within same depth class).
116        // Step 2: Depth conversion.
117        // Step 3: Alpha mode conversion.
118        //
119        // For cross-depth conversions, we convert layout at the source depth
120        // first, then change depth. This minimizes the number of channels
121        // we need to depth-convert.
122
123        let need_depth_change = from.channel_type() != to.channel_type();
124        let need_layout_change = from.layout() != to.layout();
125        let need_alpha_change =
126            from.alpha() != to.alpha() && from.alpha().is_some() && to.alpha().is_some();
127
128        // Depth/TF steps are needed when depth changes, or when both are F32
129        // and transfer functions differ.
130        let need_depth_or_tf = need_depth_change
131            || (from.channel_type() == ChannelType::F32 && from.transfer() != to.transfer());
132
133        // If we need to change depth AND layout, plan the optimal order.
134        if need_layout_change {
135            // When going to fewer channels, convert layout first (less depth work).
136            // When going to more channels, convert depth first (less layout work).
137            //
138            // Exception: Oklab layout steps require f32 data. When the source
139            // is integer (U8/U16) and the layout change involves Oklab, we must
140            // convert depth first regardless of channel count.
141            let src_ch = from.layout().channels();
142            let dst_ch = to.layout().channels();
143            let involves_oklab =
144                matches!(from.layout(), ChannelLayout::Oklab | ChannelLayout::OklabA)
145                    || matches!(to.layout(), ChannelLayout::Oklab | ChannelLayout::OklabA);
146
147            // Oklab conversion requires known primaries for the RGB→LMS matrix.
148            if involves_oklab && from.primaries == ColorPrimaries::Unknown {
149                return Err(ConvertError::NoPath { from, to });
150            }
151
152            let depth_first = need_depth_or_tf
153                && (dst_ch > src_ch || (involves_oklab && from.channel_type() != ChannelType::F32));
154
155            if depth_first {
156                // Depth first, then layout.
157                steps.extend(depth_steps(
158                    from.channel_type(),
159                    to.channel_type(),
160                    from.transfer(),
161                    to.transfer(),
162                )?);
163                steps.extend(layout_steps(from.layout(), to.layout()));
164            } else {
165                // Layout first, then depth.
166                steps.extend(layout_steps(from.layout(), to.layout()));
167                if need_depth_or_tf {
168                    steps.extend(depth_steps(
169                        from.channel_type(),
170                        to.channel_type(),
171                        from.transfer(),
172                        to.transfer(),
173                    )?);
174                }
175            }
176        } else if need_depth_or_tf {
177            steps.extend(depth_steps(
178                from.channel_type(),
179                to.channel_type(),
180                from.transfer(),
181                to.transfer(),
182            )?);
183        }
184
185        // Alpha mode conversion (if both have alpha and modes differ).
186        if need_alpha_change {
187            match (from.alpha(), to.alpha()) {
188                (Some(AlphaMode::Straight), Some(AlphaMode::Premultiplied)) => {
189                    steps.push(ConvertStep::StraightToPremul);
190                }
191                (Some(AlphaMode::Premultiplied), Some(AlphaMode::Straight)) => {
192                    steps.push(ConvertStep::PremulToStraight);
193                }
194                _ => {}
195            }
196        }
197
198        if steps.is_empty() {
199            // Transfer-only difference or alpha-mode-only: identity path.
200            steps.push(ConvertStep::Identity);
201        }
202
203        Ok(Self { from, to, steps })
204    }
205
206    /// Create a conversion plan with explicit policy enforcement.
207    ///
208    /// Validates that the planned conversion steps are allowed by the given
209    /// policies before creating the plan. Returns an error if a forbidden
210    /// operation would be required.
211    pub fn new_explicit(
212        from: PixelDescriptor,
213        to: PixelDescriptor,
214        options: &ConvertOptions,
215    ) -> Result<Self, ConvertError> {
216        // Check alpha removal policy.
217        let drops_alpha = from.alpha().is_some() && to.alpha().is_none();
218        if drops_alpha && options.alpha_policy == AlphaPolicy::Forbid {
219            return Err(ConvertError::AlphaRemovalForbidden);
220        }
221
222        // Check depth reduction policy.
223        let reduces_depth = from.channel_type().byte_size() > to.channel_type().byte_size();
224        if reduces_depth && options.depth_policy == DepthPolicy::Forbid {
225            return Err(ConvertError::DepthReductionForbidden);
226        }
227
228        // Check RGB→Gray requires luma coefficients.
229        let src_is_rgb = matches!(
230            from.layout(),
231            ChannelLayout::Rgb | ChannelLayout::Rgba | ChannelLayout::Bgra
232        );
233        let dst_is_gray = matches!(to.layout(), ChannelLayout::Gray | ChannelLayout::GrayAlpha);
234        if src_is_rgb && dst_is_gray && options.luma.is_none() {
235            return Err(ConvertError::RgbToGray);
236        }
237
238        Self::new(from, to)
239    }
240
241    /// True if conversion is a no-op.
242    #[must_use]
243    pub fn is_identity(&self) -> bool {
244        self.steps.len() == 1 && self.steps[0] == ConvertStep::Identity
245    }
246
247    /// Source descriptor.
248    pub fn from(&self) -> PixelDescriptor {
249        self.from
250    }
251
252    /// Target descriptor.
253    pub fn to(&self) -> PixelDescriptor {
254        self.to
255    }
256}
257
258/// Determine the layout conversion step(s).
259///
260/// Some layout conversions require two steps (e.g., BGRA -> RGB needs
261/// swizzle + drop alpha). Returns up to 2 steps.
262fn layout_steps(from: ChannelLayout, to: ChannelLayout) -> Vec<ConvertStep> {
263    if from == to {
264        return Vec::new();
265    }
266    match (from, to) {
267        (ChannelLayout::Bgra, ChannelLayout::Rgba) | (ChannelLayout::Rgba, ChannelLayout::Bgra) => {
268            vec![ConvertStep::SwizzleBgraRgba]
269        }
270        (ChannelLayout::Rgb, ChannelLayout::Rgba) => vec![ConvertStep::AddAlpha],
271        (ChannelLayout::Rgb, ChannelLayout::Bgra) => {
272            // Rgb -> RGBA -> BGRA: add alpha then swizzle.
273            vec![ConvertStep::AddAlpha, ConvertStep::SwizzleBgraRgba]
274        }
275        (ChannelLayout::Rgba, ChannelLayout::Rgb) => vec![ConvertStep::DropAlpha],
276        (ChannelLayout::Bgra, ChannelLayout::Rgb) => {
277            // BGRA -> RGBA -> RGB: swizzle then drop alpha.
278            vec![ConvertStep::SwizzleBgraRgba, ConvertStep::DropAlpha]
279        }
280        (ChannelLayout::Gray, ChannelLayout::Rgb) => vec![ConvertStep::GrayToRgb],
281        (ChannelLayout::Gray, ChannelLayout::Rgba) => vec![ConvertStep::GrayToRgba],
282        (ChannelLayout::Gray, ChannelLayout::Bgra) => {
283            // Gray -> RGBA -> BGRA: expand then swizzle.
284            vec![ConvertStep::GrayToRgba, ConvertStep::SwizzleBgraRgba]
285        }
286        (ChannelLayout::Rgb, ChannelLayout::Gray) => vec![ConvertStep::RgbToGray],
287        (ChannelLayout::Rgba, ChannelLayout::Gray) => vec![ConvertStep::RgbaToGray],
288        (ChannelLayout::Bgra, ChannelLayout::Gray) => {
289            // BGRA -> RGBA -> Gray: swizzle then to gray.
290            vec![ConvertStep::SwizzleBgraRgba, ConvertStep::RgbaToGray]
291        }
292        (ChannelLayout::GrayAlpha, ChannelLayout::Rgba) => vec![ConvertStep::GrayAlphaToRgba],
293        (ChannelLayout::GrayAlpha, ChannelLayout::Bgra) => {
294            // GrayAlpha -> RGBA -> BGRA: expand then swizzle.
295            vec![ConvertStep::GrayAlphaToRgba, ConvertStep::SwizzleBgraRgba]
296        }
297        (ChannelLayout::GrayAlpha, ChannelLayout::Rgb) => vec![ConvertStep::GrayAlphaToRgb],
298        (ChannelLayout::Gray, ChannelLayout::GrayAlpha) => vec![ConvertStep::GrayToGrayAlpha],
299        (ChannelLayout::GrayAlpha, ChannelLayout::Gray) => vec![ConvertStep::GrayAlphaToGray],
300
301        // Oklab ↔ RGB conversions (via linear RGB).
302        (ChannelLayout::Rgb, ChannelLayout::Oklab) => vec![ConvertStep::LinearRgbToOklab],
303        (ChannelLayout::Oklab, ChannelLayout::Rgb) => vec![ConvertStep::OklabToLinearRgb],
304        (ChannelLayout::Rgba, ChannelLayout::OklabA) => vec![ConvertStep::LinearRgbaToOklaba],
305        (ChannelLayout::OklabA, ChannelLayout::Rgba) => vec![ConvertStep::OklabaToLinearRgba],
306
307        // Oklab ↔ RGB with alpha add/drop.
308        (ChannelLayout::Rgb, ChannelLayout::OklabA) => {
309            vec![ConvertStep::AddAlpha, ConvertStep::LinearRgbaToOklaba]
310        }
311        (ChannelLayout::OklabA, ChannelLayout::Rgb) => {
312            vec![ConvertStep::OklabaToLinearRgba, ConvertStep::DropAlpha]
313        }
314        (ChannelLayout::Oklab, ChannelLayout::Rgba) => {
315            vec![ConvertStep::OklabToLinearRgb, ConvertStep::AddAlpha]
316        }
317        (ChannelLayout::Rgba, ChannelLayout::Oklab) => {
318            vec![ConvertStep::DropAlpha, ConvertStep::LinearRgbToOklab]
319        }
320
321        // Oklab ↔ BGRA (swizzle to/from RGBA, then Oklab).
322        (ChannelLayout::Bgra, ChannelLayout::OklabA) => {
323            vec![
324                ConvertStep::SwizzleBgraRgba,
325                ConvertStep::LinearRgbaToOklaba,
326            ]
327        }
328        (ChannelLayout::OklabA, ChannelLayout::Bgra) => {
329            vec![
330                ConvertStep::OklabaToLinearRgba,
331                ConvertStep::SwizzleBgraRgba,
332            ]
333        }
334        (ChannelLayout::Bgra, ChannelLayout::Oklab) => {
335            vec![
336                ConvertStep::SwizzleBgraRgba,
337                ConvertStep::DropAlpha,
338                ConvertStep::LinearRgbToOklab,
339            ]
340        }
341        (ChannelLayout::Oklab, ChannelLayout::Bgra) => {
342            vec![
343                ConvertStep::OklabToLinearRgb,
344                ConvertStep::AddAlpha,
345                ConvertStep::SwizzleBgraRgba,
346            ]
347        }
348
349        // Gray ↔ Oklab (expand gray to RGB first).
350        (ChannelLayout::Gray, ChannelLayout::Oklab) => {
351            vec![ConvertStep::GrayToRgb, ConvertStep::LinearRgbToOklab]
352        }
353        (ChannelLayout::Oklab, ChannelLayout::Gray) => {
354            vec![ConvertStep::OklabToLinearRgb, ConvertStep::RgbToGray]
355        }
356        (ChannelLayout::Gray, ChannelLayout::OklabA) => {
357            vec![ConvertStep::GrayToRgba, ConvertStep::LinearRgbaToOklaba]
358        }
359        (ChannelLayout::OklabA, ChannelLayout::Gray) => {
360            vec![ConvertStep::OklabaToLinearRgba, ConvertStep::RgbaToGray]
361        }
362        (ChannelLayout::GrayAlpha, ChannelLayout::OklabA) => {
363            vec![
364                ConvertStep::GrayAlphaToRgba,
365                ConvertStep::LinearRgbaToOklaba,
366            ]
367        }
368        (ChannelLayout::OklabA, ChannelLayout::GrayAlpha) => {
369            // Drop alpha from OklabA→Oklab, convert to RGB, then to GrayAlpha.
370            // Alpha is lost; this is inherently lossy.
371            vec![
372                ConvertStep::OklabaToLinearRgba,
373                ConvertStep::RgbaToGray,
374                ConvertStep::GrayToGrayAlpha,
375            ]
376        }
377        (ChannelLayout::GrayAlpha, ChannelLayout::Oklab) => {
378            vec![ConvertStep::GrayAlphaToRgb, ConvertStep::LinearRgbToOklab]
379        }
380        (ChannelLayout::Oklab, ChannelLayout::GrayAlpha) => {
381            vec![
382                ConvertStep::OklabToLinearRgb,
383                ConvertStep::RgbToGray,
384                ConvertStep::GrayToGrayAlpha,
385            ]
386        }
387
388        // Oklab ↔ alpha variants.
389        (ChannelLayout::Oklab, ChannelLayout::OklabA) => vec![ConvertStep::AddAlpha],
390        (ChannelLayout::OklabA, ChannelLayout::Oklab) => vec![ConvertStep::DropAlpha],
391
392        _ => Vec::new(), // Unsupported layout conversion.
393    }
394}
395
396/// Determine the depth conversion step(s), considering transfer functions.
397///
398/// Returns one or two steps. Two steps are needed when the conversion
399/// requires going through an intermediate format (e.g. PQ U16 → sRGB U8
400/// goes PQ U16 → Linear F32 → sRGB U8).
401fn depth_steps(
402    from: ChannelType,
403    to: ChannelType,
404    from_tf: TransferFunction,
405    to_tf: TransferFunction,
406) -> Result<Vec<ConvertStep>, ConvertError> {
407    if from == to && from_tf == to_tf {
408        return Ok(Vec::new());
409    }
410
411    // Same depth, different transfer function.
412    // For integer types, TF changes are metadata-only (no math).
413    // For F32, we can apply EOTF/OETF in place.
414    if from == to && from != ChannelType::F32 {
415        return Ok(Vec::new());
416    }
417
418    if from == to && from == ChannelType::F32 {
419        return match (from_tf, to_tf) {
420            (TransferFunction::Pq, TransferFunction::Linear) => {
421                Ok(vec![ConvertStep::PqF32ToLinearF32])
422            }
423            (TransferFunction::Linear, TransferFunction::Pq) => {
424                Ok(vec![ConvertStep::LinearF32ToPqF32])
425            }
426            (TransferFunction::Hlg, TransferFunction::Linear) => {
427                Ok(vec![ConvertStep::HlgF32ToLinearF32])
428            }
429            (TransferFunction::Linear, TransferFunction::Hlg) => {
430                Ok(vec![ConvertStep::LinearF32ToHlgF32])
431            }
432            // PQ ↔ HLG: go through linear.
433            (TransferFunction::Pq, TransferFunction::Hlg) => Ok(vec![
434                ConvertStep::PqF32ToLinearF32,
435                ConvertStep::LinearF32ToHlgF32,
436            ]),
437            (TransferFunction::Hlg, TransferFunction::Pq) => Ok(vec![
438                ConvertStep::HlgF32ToLinearF32,
439                ConvertStep::LinearF32ToPqF32,
440            ]),
441            // sRGB ↔ Linear are already handled.
442            (TransferFunction::Srgb | TransferFunction::Bt709, TransferFunction::Linear)
443            | (TransferFunction::Linear, TransferFunction::Srgb | TransferFunction::Bt709) => {
444                // F32 sRGB ↔ Linear: no dedicated kernel yet, treat as identity.
445                // (The sRGB kernels only handle U8 ↔ F32 transitions.)
446                Ok(Vec::new())
447            }
448            _ => Ok(Vec::new()),
449        };
450    }
451
452    match (from, to) {
453        (ChannelType::U8, ChannelType::F32) => {
454            if (from_tf == TransferFunction::Srgb || from_tf == TransferFunction::Bt709)
455                && to_tf == TransferFunction::Linear
456            {
457                Ok(vec![ConvertStep::SrgbU8ToLinearF32])
458            } else {
459                Ok(vec![ConvertStep::NaiveU8ToF32])
460            }
461        }
462        (ChannelType::F32, ChannelType::U8) => {
463            if from_tf == TransferFunction::Linear
464                && (to_tf == TransferFunction::Srgb || to_tf == TransferFunction::Bt709)
465            {
466                Ok(vec![ConvertStep::LinearF32ToSrgbU8])
467            } else {
468                Ok(vec![ConvertStep::NaiveF32ToU8])
469            }
470        }
471        (ChannelType::U16, ChannelType::F32) => {
472            // PQ/HLG U16 → Linear F32: apply EOTF during conversion.
473            match (from_tf, to_tf) {
474                (TransferFunction::Pq, TransferFunction::Linear) => {
475                    Ok(vec![ConvertStep::PqU16ToLinearF32])
476                }
477                (TransferFunction::Hlg, TransferFunction::Linear) => {
478                    Ok(vec![ConvertStep::HlgU16ToLinearF32])
479                }
480                _ => Ok(vec![ConvertStep::U16ToF32]),
481            }
482        }
483        (ChannelType::F32, ChannelType::U16) => {
484            // Linear F32 → PQ/HLG U16: apply OETF during conversion.
485            match (from_tf, to_tf) {
486                (TransferFunction::Linear, TransferFunction::Pq) => {
487                    Ok(vec![ConvertStep::LinearF32ToPqU16])
488                }
489                (TransferFunction::Linear, TransferFunction::Hlg) => {
490                    Ok(vec![ConvertStep::LinearF32ToHlgU16])
491                }
492                _ => Ok(vec![ConvertStep::F32ToU16]),
493            }
494        }
495        (ChannelType::U16, ChannelType::U8) => {
496            // HDR U16 → SDR U8: go through linear F32 with proper EOTF → OETF.
497            if from_tf == TransferFunction::Pq && to_tf == TransferFunction::Srgb {
498                Ok(vec![
499                    ConvertStep::PqU16ToLinearF32,
500                    ConvertStep::LinearF32ToSrgbU8,
501                ])
502            } else if from_tf == TransferFunction::Hlg && to_tf == TransferFunction::Srgb {
503                Ok(vec![
504                    ConvertStep::HlgU16ToLinearF32,
505                    ConvertStep::LinearF32ToSrgbU8,
506                ])
507            } else {
508                Ok(vec![ConvertStep::U16ToU8])
509            }
510        }
511        (ChannelType::U8, ChannelType::U16) => Ok(vec![ConvertStep::U8ToU16]),
512        _ => Err(ConvertError::NoPath {
513            from: PixelDescriptor::new(from, ChannelLayout::Rgb, None, from_tf),
514            to: PixelDescriptor::new(to, ChannelLayout::Rgb, None, to_tf),
515        }),
516    }
517}
518
519// ---------------------------------------------------------------------------
520// Row conversion kernels
521// ---------------------------------------------------------------------------
522
523/// Convert one row of `width` pixels using a pre-computed plan.
524///
525/// `src` and `dst` must be sized for `width` pixels in their respective formats.
526/// For multi-step plans, an internal scratch buffer is used.
527pub fn convert_row(plan: &ConvertPlan, src: &[u8], dst: &mut [u8], width: u32) {
528    if plan.is_identity() {
529        let len = min(src.len(), dst.len());
530        dst[..len].copy_from_slice(&src[..len]);
531        return;
532    }
533
534    if plan.steps.len() == 1 {
535        apply_step_u8(plan.steps[0], src, dst, width, plan.from, plan.to);
536        return;
537    }
538
539    // Multi-step: use intermediate buffer.
540    // Calculate intermediate format after first step.
541    let mut current = Vec::from(src);
542    let mut current_desc = plan.from;
543
544    for (i, &step) in plan.steps.iter().enumerate() {
545        let is_last = i == plan.steps.len() - 1;
546        let next_desc = if is_last {
547            plan.to
548        } else {
549            intermediate_desc(current_desc, step)
550        };
551
552        let next_bpp = next_desc.bytes_per_pixel();
553        let next_len = (width as usize) * next_bpp;
554
555        if is_last {
556            apply_step_u8(step, &current, dst, width, current_desc, next_desc);
557        } else {
558            let mut next = vec![0u8; next_len];
559            apply_step_u8(step, &current, &mut next, width, current_desc, next_desc);
560            current = next;
561            current_desc = next_desc;
562        }
563    }
564}
565
566/// Compute the descriptor after applying one step.
567fn intermediate_desc(current: PixelDescriptor, step: ConvertStep) -> PixelDescriptor {
568    match step {
569        ConvertStep::Identity => current,
570        ConvertStep::SwizzleBgraRgba => {
571            let new_layout = match current.layout() {
572                ChannelLayout::Bgra => ChannelLayout::Rgba,
573                ChannelLayout::Rgba => ChannelLayout::Bgra,
574                other => other,
575            };
576            PixelDescriptor::new(
577                current.channel_type(),
578                new_layout,
579                current.alpha(),
580                current.transfer(),
581            )
582        }
583        ConvertStep::AddAlpha => PixelDescriptor::new(
584            current.channel_type(),
585            ChannelLayout::Rgba,
586            Some(AlphaMode::Straight),
587            current.transfer(),
588        ),
589        ConvertStep::DropAlpha => PixelDescriptor::new(
590            current.channel_type(),
591            ChannelLayout::Rgb,
592            None,
593            current.transfer(),
594        ),
595        ConvertStep::GrayToRgb => PixelDescriptor::new(
596            current.channel_type(),
597            ChannelLayout::Rgb,
598            None,
599            current.transfer(),
600        ),
601        ConvertStep::GrayToRgba => PixelDescriptor::new(
602            current.channel_type(),
603            ChannelLayout::Rgba,
604            Some(AlphaMode::Straight),
605            current.transfer(),
606        ),
607        ConvertStep::RgbToGray | ConvertStep::RgbaToGray => PixelDescriptor::new(
608            current.channel_type(),
609            ChannelLayout::Gray,
610            None,
611            current.transfer(),
612        ),
613        ConvertStep::GrayAlphaToRgba => PixelDescriptor::new(
614            current.channel_type(),
615            ChannelLayout::Rgba,
616            current.alpha(),
617            current.transfer(),
618        ),
619        ConvertStep::GrayAlphaToRgb => PixelDescriptor::new(
620            current.channel_type(),
621            ChannelLayout::Rgb,
622            None,
623            current.transfer(),
624        ),
625        ConvertStep::GrayToGrayAlpha => PixelDescriptor::new(
626            current.channel_type(),
627            ChannelLayout::GrayAlpha,
628            Some(AlphaMode::Straight),
629            current.transfer(),
630        ),
631        ConvertStep::GrayAlphaToGray => PixelDescriptor::new(
632            current.channel_type(),
633            ChannelLayout::Gray,
634            None,
635            current.transfer(),
636        ),
637        ConvertStep::SrgbU8ToLinearF32
638        | ConvertStep::NaiveU8ToF32
639        | ConvertStep::U16ToF32
640        | ConvertStep::PqU16ToLinearF32
641        | ConvertStep::HlgU16ToLinearF32
642        | ConvertStep::PqF32ToLinearF32
643        | ConvertStep::HlgF32ToLinearF32 => PixelDescriptor::new(
644            ChannelType::F32,
645            current.layout(),
646            current.alpha(),
647            TransferFunction::Linear,
648        ),
649        ConvertStep::LinearF32ToSrgbU8 | ConvertStep::NaiveF32ToU8 | ConvertStep::U16ToU8 => {
650            PixelDescriptor::new(
651                ChannelType::U8,
652                current.layout(),
653                current.alpha(),
654                TransferFunction::Srgb,
655            )
656        }
657        ConvertStep::U8ToU16 => PixelDescriptor::new(
658            ChannelType::U16,
659            current.layout(),
660            current.alpha(),
661            current.transfer(),
662        ),
663        ConvertStep::F32ToU16 | ConvertStep::LinearF32ToPqU16 | ConvertStep::LinearF32ToHlgU16 => {
664            let tf = match step {
665                ConvertStep::LinearF32ToPqU16 => TransferFunction::Pq,
666                ConvertStep::LinearF32ToHlgU16 => TransferFunction::Hlg,
667                _ => current.transfer(),
668            };
669            PixelDescriptor::new(ChannelType::U16, current.layout(), current.alpha(), tf)
670        }
671        ConvertStep::LinearF32ToPqF32 => PixelDescriptor::new(
672            ChannelType::F32,
673            current.layout(),
674            current.alpha(),
675            TransferFunction::Pq,
676        ),
677        ConvertStep::LinearF32ToHlgF32 => PixelDescriptor::new(
678            ChannelType::F32,
679            current.layout(),
680            current.alpha(),
681            TransferFunction::Hlg,
682        ),
683        ConvertStep::StraightToPremul => PixelDescriptor::new(
684            current.channel_type(),
685            current.layout(),
686            Some(AlphaMode::Premultiplied),
687            current.transfer(),
688        ),
689        ConvertStep::PremulToStraight => PixelDescriptor::new(
690            current.channel_type(),
691            current.layout(),
692            Some(AlphaMode::Straight),
693            current.transfer(),
694        ),
695        ConvertStep::LinearRgbToOklab => PixelDescriptor::new(
696            ChannelType::F32,
697            ChannelLayout::Oklab,
698            None,
699            TransferFunction::Unknown,
700        )
701        .with_primaries(current.primaries),
702        ConvertStep::OklabToLinearRgb => PixelDescriptor::new(
703            ChannelType::F32,
704            ChannelLayout::Rgb,
705            None,
706            TransferFunction::Linear,
707        )
708        .with_primaries(current.primaries),
709        ConvertStep::LinearRgbaToOklaba => PixelDescriptor::new(
710            ChannelType::F32,
711            ChannelLayout::OklabA,
712            Some(AlphaMode::Straight),
713            TransferFunction::Unknown,
714        )
715        .with_primaries(current.primaries),
716        ConvertStep::OklabaToLinearRgba => PixelDescriptor::new(
717            ChannelType::F32,
718            ChannelLayout::Rgba,
719            current.alpha(),
720            TransferFunction::Linear,
721        )
722        .with_primaries(current.primaries),
723    }
724}
725
726/// Apply a single conversion step on raw byte slices.
727fn apply_step_u8(
728    step: ConvertStep,
729    src: &[u8],
730    dst: &mut [u8],
731    width: u32,
732    from: PixelDescriptor,
733    _to: PixelDescriptor,
734) {
735    let w = width as usize;
736
737    match step {
738        ConvertStep::Identity => {
739            let len = min(src.len(), dst.len());
740            dst[..len].copy_from_slice(&src[..len]);
741        }
742
743        ConvertStep::SwizzleBgraRgba => {
744            swizzle_bgra_rgba(src, dst, w, from.channel_type());
745        }
746
747        ConvertStep::AddAlpha => {
748            add_alpha(src, dst, w, from.channel_type());
749        }
750
751        ConvertStep::DropAlpha => {
752            drop_alpha(src, dst, w, from.channel_type());
753        }
754
755        ConvertStep::GrayToRgb => {
756            gray_to_rgb(src, dst, w, from.channel_type());
757        }
758
759        ConvertStep::GrayToRgba => {
760            gray_to_rgba(src, dst, w, from.channel_type());
761        }
762
763        ConvertStep::RgbToGray => {
764            rgb_to_gray_u8(src, dst, w);
765        }
766
767        ConvertStep::RgbaToGray => {
768            rgba_to_gray_u8(src, dst, w);
769        }
770
771        ConvertStep::GrayAlphaToRgba => {
772            gray_alpha_to_rgba(src, dst, w, from.channel_type());
773        }
774
775        ConvertStep::GrayAlphaToRgb => {
776            gray_alpha_to_rgb(src, dst, w, from.channel_type());
777        }
778
779        ConvertStep::GrayToGrayAlpha => {
780            gray_to_gray_alpha(src, dst, w, from.channel_type());
781        }
782
783        ConvertStep::GrayAlphaToGray => {
784            gray_alpha_to_gray(src, dst, w, from.channel_type());
785        }
786
787        ConvertStep::SrgbU8ToLinearF32 => {
788            srgb_u8_to_linear_f32(src, dst, w, from.layout().channels());
789        }
790
791        ConvertStep::LinearF32ToSrgbU8 => {
792            linear_f32_to_srgb_u8(src, dst, w, from.layout().channels());
793        }
794
795        ConvertStep::NaiveU8ToF32 => {
796            naive_u8_to_f32(src, dst, w, from.layout().channels());
797        }
798
799        ConvertStep::NaiveF32ToU8 => {
800            naive_f32_to_u8(src, dst, w, from.layout().channels());
801        }
802
803        ConvertStep::U16ToU8 => {
804            u16_to_u8(src, dst, w, from.layout().channels());
805        }
806
807        ConvertStep::U8ToU16 => {
808            u8_to_u16(src, dst, w, from.layout().channels());
809        }
810
811        ConvertStep::U16ToF32 => {
812            u16_to_f32(src, dst, w, from.layout().channels());
813        }
814
815        ConvertStep::F32ToU16 => {
816            f32_to_u16(src, dst, w, from.layout().channels());
817        }
818
819        ConvertStep::PqU16ToLinearF32 => {
820            pq_u16_to_linear_f32(src, dst, w, from.layout().channels());
821        }
822
823        ConvertStep::LinearF32ToPqU16 => {
824            linear_f32_to_pq_u16(src, dst, w, from.layout().channels());
825        }
826
827        ConvertStep::PqF32ToLinearF32 => {
828            pq_f32_to_linear_f32(src, dst, w, from.layout().channels());
829        }
830
831        ConvertStep::LinearF32ToPqF32 => {
832            linear_f32_to_pq_f32(src, dst, w, from.layout().channels());
833        }
834
835        ConvertStep::HlgU16ToLinearF32 => {
836            hlg_u16_to_linear_f32(src, dst, w, from.layout().channels());
837        }
838
839        ConvertStep::LinearF32ToHlgU16 => {
840            linear_f32_to_hlg_u16(src, dst, w, from.layout().channels());
841        }
842
843        ConvertStep::HlgF32ToLinearF32 => {
844            hlg_f32_to_linear_f32(src, dst, w, from.layout().channels());
845        }
846
847        ConvertStep::LinearF32ToHlgF32 => {
848            linear_f32_to_hlg_f32(src, dst, w, from.layout().channels());
849        }
850
851        ConvertStep::StraightToPremul => {
852            straight_to_premul(src, dst, w, from.channel_type(), from.layout());
853        }
854
855        ConvertStep::PremulToStraight => {
856            premul_to_straight(src, dst, w, from.channel_type(), from.layout());
857        }
858
859        ConvertStep::LinearRgbToOklab => {
860            linear_rgb_to_oklab_f32(src, dst, w, from.primaries);
861        }
862
863        ConvertStep::OklabToLinearRgb => {
864            oklab_to_linear_rgb_f32(src, dst, w, from.primaries);
865        }
866
867        ConvertStep::LinearRgbaToOklaba => {
868            linear_rgba_to_oklaba_f32(src, dst, w, from.primaries);
869        }
870
871        ConvertStep::OklabaToLinearRgba => {
872            oklaba_to_linear_rgba_f32(src, dst, w, from.primaries);
873        }
874    }
875}
876
877// ---------------------------------------------------------------------------
878// Kernel implementations
879// ---------------------------------------------------------------------------
880
881/// BGRA ↔ RGBA swizzle.
882fn swizzle_bgra_rgba(src: &[u8], dst: &mut [u8], width: usize, ch_type: ChannelType) {
883    let bps = ch_type.byte_size(); // bytes per sample
884    let pixel_bytes = 4 * bps;
885
886    match ch_type {
887        ChannelType::U8 => {
888            for i in 0..width {
889                let s = i * pixel_bytes;
890                let d = i * pixel_bytes;
891                dst[d] = src[s + 2]; // R ← B (or B ← R)
892                dst[d + 1] = src[s + 1]; // G ← G
893                dst[d + 2] = src[s]; // B ← R (or R ← B)
894                dst[d + 3] = src[s + 3]; // A ← A
895            }
896        }
897        ChannelType::U16 => {
898            let src16: &[u16] = bytemuck::cast_slice(&src[..width * pixel_bytes]);
899            let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..width * pixel_bytes]);
900            for i in 0..width {
901                let s = i * 4;
902                dst16[s] = src16[s + 2];
903                dst16[s + 1] = src16[s + 1];
904                dst16[s + 2] = src16[s];
905                dst16[s + 3] = src16[s + 3];
906            }
907        }
908        ChannelType::F32 => {
909            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * pixel_bytes]);
910            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * pixel_bytes]);
911            for i in 0..width {
912                let s = i * 4;
913                dstf[s] = srcf[s + 2];
914                dstf[s + 1] = srcf[s + 1];
915                dstf[s + 2] = srcf[s];
916                dstf[s + 3] = srcf[s + 3];
917            }
918        }
919        _ => {}
920    }
921}
922
923/// Add opaque alpha channel (3ch → 4ch).
924fn add_alpha(src: &[u8], dst: &mut [u8], width: usize, ch_type: ChannelType) {
925    match ch_type {
926        ChannelType::U8 => {
927            for i in 0..width {
928                dst[i * 4] = src[i * 3];
929                dst[i * 4 + 1] = src[i * 3 + 1];
930                dst[i * 4 + 2] = src[i * 3 + 2];
931                dst[i * 4 + 3] = 255;
932            }
933        }
934        ChannelType::U16 => {
935            let src16: &[u16] = bytemuck::cast_slice(&src[..width * 6]);
936            let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..width * 8]);
937            for i in 0..width {
938                dst16[i * 4] = src16[i * 3];
939                dst16[i * 4 + 1] = src16[i * 3 + 1];
940                dst16[i * 4 + 2] = src16[i * 3 + 2];
941                dst16[i * 4 + 3] = 65535;
942            }
943        }
944        ChannelType::F32 => {
945            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 12]);
946            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 16]);
947            for i in 0..width {
948                dstf[i * 4] = srcf[i * 3];
949                dstf[i * 4 + 1] = srcf[i * 3 + 1];
950                dstf[i * 4 + 2] = srcf[i * 3 + 2];
951                dstf[i * 4 + 3] = 1.0;
952            }
953        }
954        _ => {}
955    }
956}
957
958/// Drop alpha channel (4ch → 3ch).
959fn drop_alpha(src: &[u8], dst: &mut [u8], width: usize, ch_type: ChannelType) {
960    match ch_type {
961        ChannelType::U8 => {
962            for i in 0..width {
963                dst[i * 3] = src[i * 4];
964                dst[i * 3 + 1] = src[i * 4 + 1];
965                dst[i * 3 + 2] = src[i * 4 + 2];
966            }
967        }
968        ChannelType::U16 => {
969            let src16: &[u16] = bytemuck::cast_slice(&src[..width * 8]);
970            let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..width * 6]);
971            for i in 0..width {
972                dst16[i * 3] = src16[i * 4];
973                dst16[i * 3 + 1] = src16[i * 4 + 1];
974                dst16[i * 3 + 2] = src16[i * 4 + 2];
975            }
976        }
977        ChannelType::F32 => {
978            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 16]);
979            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 12]);
980            for i in 0..width {
981                dstf[i * 3] = srcf[i * 4];
982                dstf[i * 3 + 1] = srcf[i * 4 + 1];
983                dstf[i * 3 + 2] = srcf[i * 4 + 2];
984            }
985        }
986        _ => {}
987    }
988}
989
990/// Gray → RGB (replicate).
991fn gray_to_rgb(src: &[u8], dst: &mut [u8], width: usize, ch_type: ChannelType) {
992    match ch_type {
993        ChannelType::U8 => {
994            for i in 0..width {
995                let g = src[i];
996                dst[i * 3] = g;
997                dst[i * 3 + 1] = g;
998                dst[i * 3 + 2] = g;
999            }
1000        }
1001        ChannelType::U16 => {
1002            let src16: &[u16] = bytemuck::cast_slice(&src[..width * 2]);
1003            let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..width * 6]);
1004            for i in 0..width {
1005                let g = src16[i];
1006                dst16[i * 3] = g;
1007                dst16[i * 3 + 1] = g;
1008                dst16[i * 3 + 2] = g;
1009            }
1010        }
1011        ChannelType::F32 => {
1012            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 4]);
1013            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 12]);
1014            for i in 0..width {
1015                let g = srcf[i];
1016                dstf[i * 3] = g;
1017                dstf[i * 3 + 1] = g;
1018                dstf[i * 3 + 2] = g;
1019            }
1020        }
1021        _ => {}
1022    }
1023}
1024
1025/// Gray → RGBA (replicate + opaque alpha).
1026fn gray_to_rgba(src: &[u8], dst: &mut [u8], width: usize, ch_type: ChannelType) {
1027    match ch_type {
1028        ChannelType::U8 => {
1029            for i in 0..width {
1030                let g = src[i];
1031                dst[i * 4] = g;
1032                dst[i * 4 + 1] = g;
1033                dst[i * 4 + 2] = g;
1034                dst[i * 4 + 3] = 255;
1035            }
1036        }
1037        ChannelType::U16 => {
1038            let src16: &[u16] = bytemuck::cast_slice(&src[..width * 2]);
1039            let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..width * 8]);
1040            for i in 0..width {
1041                let g = src16[i];
1042                dst16[i * 4] = g;
1043                dst16[i * 4 + 1] = g;
1044                dst16[i * 4 + 2] = g;
1045                dst16[i * 4 + 3] = 65535;
1046            }
1047        }
1048        ChannelType::F32 => {
1049            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 4]);
1050            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 16]);
1051            for i in 0..width {
1052                let g = srcf[i];
1053                dstf[i * 4] = g;
1054                dstf[i * 4 + 1] = g;
1055                dstf[i * 4 + 2] = g;
1056                dstf[i * 4 + 3] = 1.0;
1057            }
1058        }
1059        _ => {}
1060    }
1061}
1062
1063/// RGB → Gray using BT.709 luma coefficients (u8 only).
1064fn rgb_to_gray_u8(src: &[u8], dst: &mut [u8], width: usize) {
1065    for i in 0..width {
1066        let r = src[i * 3] as u32;
1067        let g = src[i * 3 + 1] as u32;
1068        let b = src[i * 3 + 2] as u32;
1069        // BT.709: 0.2126R + 0.7152G + 0.0722B
1070        // Fixed-point: (54R + 183G + 19B + 128) >> 8
1071        dst[i] = ((54 * r + 183 * g + 19 * b + 128) >> 8) as u8;
1072    }
1073}
1074
1075/// RGBA → Gray using BT.709 luma, drop alpha (u8 only).
1076fn rgba_to_gray_u8(src: &[u8], dst: &mut [u8], width: usize) {
1077    for i in 0..width {
1078        let r = src[i * 4] as u32;
1079        let g = src[i * 4 + 1] as u32;
1080        let b = src[i * 4 + 2] as u32;
1081        dst[i] = ((54 * r + 183 * g + 19 * b + 128) >> 8) as u8;
1082    }
1083}
1084
1085/// GrayAlpha → RGBA (replicate gray, preserve alpha).
1086fn gray_alpha_to_rgba(src: &[u8], dst: &mut [u8], width: usize, ch_type: ChannelType) {
1087    match ch_type {
1088        ChannelType::U8 => {
1089            for i in 0..width {
1090                let g = src[i * 2];
1091                let a = src[i * 2 + 1];
1092                dst[i * 4] = g;
1093                dst[i * 4 + 1] = g;
1094                dst[i * 4 + 2] = g;
1095                dst[i * 4 + 3] = a;
1096            }
1097        }
1098        ChannelType::U16 => {
1099            let src16: &[u16] = bytemuck::cast_slice(&src[..width * 4]);
1100            let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..width * 8]);
1101            for i in 0..width {
1102                let g = src16[i * 2];
1103                let a = src16[i * 2 + 1];
1104                dst16[i * 4] = g;
1105                dst16[i * 4 + 1] = g;
1106                dst16[i * 4 + 2] = g;
1107                dst16[i * 4 + 3] = a;
1108            }
1109        }
1110        ChannelType::F32 => {
1111            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 8]);
1112            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 16]);
1113            for i in 0..width {
1114                let g = srcf[i * 2];
1115                let a = srcf[i * 2 + 1];
1116                dstf[i * 4] = g;
1117                dstf[i * 4 + 1] = g;
1118                dstf[i * 4 + 2] = g;
1119                dstf[i * 4 + 3] = a;
1120            }
1121        }
1122        _ => {}
1123    }
1124}
1125
1126/// GrayAlpha → RGB (replicate gray, drop alpha).
1127fn gray_alpha_to_rgb(src: &[u8], dst: &mut [u8], width: usize, ch_type: ChannelType) {
1128    match ch_type {
1129        ChannelType::U8 => {
1130            for i in 0..width {
1131                let g = src[i * 2];
1132                dst[i * 3] = g;
1133                dst[i * 3 + 1] = g;
1134                dst[i * 3 + 2] = g;
1135            }
1136        }
1137        ChannelType::U16 => {
1138            let src16: &[u16] = bytemuck::cast_slice(&src[..width * 4]);
1139            let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..width * 6]);
1140            for i in 0..width {
1141                let g = src16[i * 2];
1142                dst16[i * 3] = g;
1143                dst16[i * 3 + 1] = g;
1144                dst16[i * 3 + 2] = g;
1145            }
1146        }
1147        ChannelType::F32 => {
1148            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 8]);
1149            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 12]);
1150            for i in 0..width {
1151                let g = srcf[i * 2];
1152                dstf[i * 3] = g;
1153                dstf[i * 3 + 1] = g;
1154                dstf[i * 3 + 2] = g;
1155            }
1156        }
1157        _ => {}
1158    }
1159}
1160
1161/// Gray → GrayAlpha (add opaque alpha).
1162fn gray_to_gray_alpha(src: &[u8], dst: &mut [u8], width: usize, ch_type: ChannelType) {
1163    match ch_type {
1164        ChannelType::U8 => {
1165            for i in 0..width {
1166                dst[i * 2] = src[i];
1167                dst[i * 2 + 1] = 255;
1168            }
1169        }
1170        ChannelType::U16 => {
1171            let src16: &[u16] = bytemuck::cast_slice(&src[..width * 2]);
1172            let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..width * 4]);
1173            for i in 0..width {
1174                dst16[i * 2] = src16[i];
1175                dst16[i * 2 + 1] = 65535;
1176            }
1177        }
1178        ChannelType::F32 => {
1179            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 4]);
1180            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 8]);
1181            for i in 0..width {
1182                dstf[i * 2] = srcf[i];
1183                dstf[i * 2 + 1] = 1.0;
1184            }
1185        }
1186        _ => {}
1187    }
1188}
1189
1190/// GrayAlpha → Gray (drop alpha).
1191fn gray_alpha_to_gray(src: &[u8], dst: &mut [u8], width: usize, ch_type: ChannelType) {
1192    match ch_type {
1193        ChannelType::U8 => {
1194            for i in 0..width {
1195                dst[i] = src[i * 2];
1196            }
1197        }
1198        ChannelType::U16 => {
1199            let src16: &[u16] = bytemuck::cast_slice(&src[..width * 4]);
1200            let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..width * 2]);
1201            for i in 0..width {
1202                dst16[i] = src16[i * 2];
1203            }
1204        }
1205        ChannelType::F32 => {
1206            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 8]);
1207            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 4]);
1208            for i in 0..width {
1209                dstf[i] = srcf[i * 2];
1210            }
1211        }
1212        _ => {}
1213    }
1214}
1215
1216// ---------------------------------------------------------------------------
1217// Depth conversion kernels (transfer-function-aware)
1218// ---------------------------------------------------------------------------
1219
1220/// sRGB u8 → linear f32 using `linear-srgb` SIMD batch conversion.
1221fn srgb_u8_to_linear_f32(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1222    let count = width * channels;
1223    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..count * 4]);
1224    linear_srgb::default::srgb_u8_to_linear_slice(&src[..count], dstf);
1225}
1226
1227/// Linear f32 → sRGB u8 using `linear-srgb` SIMD batch conversion.
1228fn linear_f32_to_srgb_u8(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1229    let count = width * channels;
1230    let srcf: &[f32] = bytemuck::cast_slice(&src[..count * 4]);
1231    linear_srgb::default::linear_to_srgb_u8_slice(srcf, &mut dst[..count]);
1232}
1233
1234/// Naive u8 → f32 (v / 255.0, no transfer function).
1235fn naive_u8_to_f32(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1236    let count = width * channels;
1237    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..count * 4]);
1238    for i in 0..count {
1239        dstf[i] = src[i] as f32 / 255.0;
1240    }
1241}
1242
1243/// Naive f32 → u8 (clamp [0,1], * 255 + 0.5).
1244fn naive_f32_to_u8(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1245    let count = width * channels;
1246    let srcf: &[f32] = bytemuck::cast_slice(&src[..count * 4]);
1247    for i in 0..count {
1248        dst[i] = (srcf[i].clamp(0.0, 1.0) * 255.0 + 0.5) as u8;
1249    }
1250}
1251
1252/// u16 → u8: (v * 255 + 32768) >> 16.
1253fn u16_to_u8(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1254    let count = width * channels;
1255    let src16: &[u16] = bytemuck::cast_slice(&src[..count * 2]);
1256    for i in 0..count {
1257        dst[i] = ((src16[i] as u32 * 255 + 32768) >> 16) as u8;
1258    }
1259}
1260
1261/// u8 → u16: v * 257.
1262fn u8_to_u16(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1263    let count = width * channels;
1264    let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..count * 2]);
1265    for i in 0..count {
1266        dst16[i] = src[i] as u16 * 257;
1267    }
1268}
1269
1270/// u16 → f32: v / 65535.0.
1271fn u16_to_f32(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1272    let count = width * channels;
1273    let src16: &[u16] = bytemuck::cast_slice(&src[..count * 2]);
1274    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..count * 4]);
1275    for i in 0..count {
1276        dstf[i] = src16[i] as f32 / 65535.0;
1277    }
1278}
1279
1280/// f32 → u16: clamp [0,1], * 65535 + 0.5.
1281fn f32_to_u16(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1282    let count = width * channels;
1283    let srcf: &[f32] = bytemuck::cast_slice(&src[..count * 4]);
1284    let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..count * 2]);
1285    for i in 0..count {
1286        dst16[i] = (srcf[i].clamp(0.0, 1.0) * 65535.0 + 0.5) as u16;
1287    }
1288}
1289
1290// ---------------------------------------------------------------------------
1291// PQ (SMPTE ST 2084) transfer function — delegates to linear-srgb
1292// ---------------------------------------------------------------------------
1293
1294/// PQ EOTF: encoded [0,1] → linear light [0,1] (where 1.0 = 10000 cd/m²).
1295///
1296/// Uses rational polynomial from `linear-srgb` (no `powf` calls).
1297#[inline]
1298pub(crate) fn pq_eotf(v: f32) -> f32 {
1299    linear_srgb::tf::pq_to_linear(v)
1300}
1301
1302/// PQ inverse EOTF (OETF): linear light [0,1] → encoded [0,1].
1303///
1304/// Uses rational polynomial from `linear-srgb` (no `powf` calls).
1305#[inline]
1306pub(crate) fn pq_oetf(v: f32) -> f32 {
1307    linear_srgb::tf::linear_to_pq(v)
1308}
1309
1310/// PQ U16 → Linear F32 (EOTF applied during depth conversion).
1311fn pq_u16_to_linear_f32(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1312    let count = width * channels;
1313    let src16: &[u16] = bytemuck::cast_slice(&src[..count * 2]);
1314    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..count * 4]);
1315    for i in 0..count {
1316        let normalized = src16[i] as f32 / 65535.0;
1317        dstf[i] = pq_eotf(normalized);
1318    }
1319}
1320
1321/// Linear F32 → PQ U16 (OETF applied during depth conversion).
1322fn linear_f32_to_pq_u16(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1323    let count = width * channels;
1324    let srcf: &[f32] = bytemuck::cast_slice(&src[..count * 4]);
1325    let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..count * 2]);
1326    for i in 0..count {
1327        let encoded = pq_oetf(srcf[i].max(0.0));
1328        dst16[i] = (encoded.clamp(0.0, 1.0) * 65535.0 + 0.5) as u16;
1329    }
1330}
1331
1332/// PQ F32 → Linear F32 (EOTF, same depth).
1333fn pq_f32_to_linear_f32(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1334    let count = width * channels;
1335    let srcf: &[f32] = bytemuck::cast_slice(&src[..count * 4]);
1336    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..count * 4]);
1337    for i in 0..count {
1338        dstf[i] = pq_eotf(srcf[i]);
1339    }
1340}
1341
1342/// Linear F32 → PQ F32 (OETF, same depth).
1343fn linear_f32_to_pq_f32(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1344    let count = width * channels;
1345    let srcf: &[f32] = bytemuck::cast_slice(&src[..count * 4]);
1346    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..count * 4]);
1347    for i in 0..count {
1348        dstf[i] = pq_oetf(srcf[i].max(0.0));
1349    }
1350}
1351
1352// ---------------------------------------------------------------------------
1353// HLG (ARIB STD-B67) transfer function — delegates to linear-srgb
1354// ---------------------------------------------------------------------------
1355
1356/// HLG OETF: scene-linear [0,1] → encoded [0,1].
1357///
1358/// Uses `fast_log2f` from `linear-srgb` (no `libm` ln calls).
1359#[inline]
1360pub(crate) fn hlg_oetf(v: f32) -> f32 {
1361    linear_srgb::tf::linear_to_hlg(v)
1362}
1363
1364/// HLG inverse OETF (EOTF): encoded [0,1] → scene-linear [0,1].
1365///
1366/// Uses `fast_pow2f` from `linear-srgb` (no `libm` exp calls).
1367#[inline]
1368pub(crate) fn hlg_eotf(v: f32) -> f32 {
1369    linear_srgb::tf::hlg_to_linear(v)
1370}
1371
1372/// HLG U16 → Linear F32 (EOTF applied during depth conversion).
1373fn hlg_u16_to_linear_f32(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1374    let count = width * channels;
1375    let src16: &[u16] = bytemuck::cast_slice(&src[..count * 2]);
1376    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..count * 4]);
1377    for i in 0..count {
1378        let normalized = src16[i] as f32 / 65535.0;
1379        dstf[i] = hlg_eotf(normalized);
1380    }
1381}
1382
1383/// Linear F32 → HLG U16 (OETF applied during depth conversion).
1384fn linear_f32_to_hlg_u16(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1385    let count = width * channels;
1386    let srcf: &[f32] = bytemuck::cast_slice(&src[..count * 4]);
1387    let dst16: &mut [u16] = bytemuck::cast_slice_mut(&mut dst[..count * 2]);
1388    for i in 0..count {
1389        let encoded = hlg_oetf(srcf[i]);
1390        dst16[i] = (encoded.clamp(0.0, 1.0) * 65535.0 + 0.5) as u16;
1391    }
1392}
1393
1394/// HLG F32 → Linear F32 (EOTF, same depth).
1395fn hlg_f32_to_linear_f32(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1396    let count = width * channels;
1397    let srcf: &[f32] = bytemuck::cast_slice(&src[..count * 4]);
1398    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..count * 4]);
1399    for i in 0..count {
1400        dstf[i] = hlg_eotf(srcf[i]);
1401    }
1402}
1403
1404/// Linear F32 → HLG F32 (OETF, same depth).
1405fn linear_f32_to_hlg_f32(src: &[u8], dst: &mut [u8], width: usize, channels: usize) {
1406    let count = width * channels;
1407    let srcf: &[f32] = bytemuck::cast_slice(&src[..count * 4]);
1408    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..count * 4]);
1409    for i in 0..count {
1410        dstf[i] = hlg_oetf(srcf[i]);
1411    }
1412}
1413
1414// ---------------------------------------------------------------------------
1415// Alpha premultiplication
1416// ---------------------------------------------------------------------------
1417
1418/// Straight → Premultiplied alpha (in-place copy from src to dst).
1419fn straight_to_premul(
1420    src: &[u8],
1421    dst: &mut [u8],
1422    width: usize,
1423    ch_type: ChannelType,
1424    layout: ChannelLayout,
1425) {
1426    let channels = layout.channels();
1427    let alpha_idx = channels - 1;
1428
1429    match ch_type {
1430        ChannelType::U8 => {
1431            for i in 0..width {
1432                let base = i * channels;
1433                let a = src[base + alpha_idx] as u32;
1434                for c in 0..alpha_idx {
1435                    dst[base + c] = ((src[base + c] as u32 * a + 128) / 255) as u8;
1436                }
1437                dst[base + alpha_idx] = src[base + alpha_idx];
1438            }
1439        }
1440        ChannelType::F32 => {
1441            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * channels * 4]);
1442            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * channels * 4]);
1443            for i in 0..width {
1444                let base = i * channels;
1445                let a = srcf[base + alpha_idx];
1446                for c in 0..alpha_idx {
1447                    dstf[base + c] = srcf[base + c] * a;
1448                }
1449                dstf[base + alpha_idx] = a;
1450            }
1451        }
1452        _ => {
1453            // Fallback: copy.
1454            let len = min(src.len(), dst.len());
1455            dst[..len].copy_from_slice(&src[..len]);
1456        }
1457    }
1458}
1459
1460/// Premultiplied → Straight alpha.
1461fn premul_to_straight(
1462    src: &[u8],
1463    dst: &mut [u8],
1464    width: usize,
1465    ch_type: ChannelType,
1466    layout: ChannelLayout,
1467) {
1468    let channels = layout.channels();
1469    let alpha_idx = channels - 1;
1470
1471    match ch_type {
1472        ChannelType::U8 => {
1473            for i in 0..width {
1474                let base = i * channels;
1475                let a = src[base + alpha_idx];
1476                if a == 0 {
1477                    for c in 0..channels {
1478                        dst[base + c] = 0;
1479                    }
1480                } else {
1481                    let a32 = a as u32;
1482                    for c in 0..alpha_idx {
1483                        dst[base + c] = ((src[base + c] as u32 * 255 + a32 / 2) / a32) as u8;
1484                    }
1485                    dst[base + alpha_idx] = a;
1486                }
1487            }
1488        }
1489        ChannelType::F32 => {
1490            let srcf: &[f32] = bytemuck::cast_slice(&src[..width * channels * 4]);
1491            let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * channels * 4]);
1492            for i in 0..width {
1493                let base = i * channels;
1494                let a = srcf[base + alpha_idx];
1495                if a == 0.0 {
1496                    for c in 0..channels {
1497                        dstf[base + c] = 0.0;
1498                    }
1499                } else {
1500                    let inv_a = 1.0 / a;
1501                    for c in 0..alpha_idx {
1502                        dstf[base + c] = srcf[base + c] * inv_a;
1503                    }
1504                    dstf[base + alpha_idx] = a;
1505                }
1506            }
1507        }
1508        _ => {
1509            let len = min(src.len(), dst.len());
1510            dst[..len].copy_from_slice(&src[..len]);
1511        }
1512    }
1513}
1514
1515// ---------------------------------------------------------------------------
1516// Oklab conversion kernels
1517// ---------------------------------------------------------------------------
1518
1519use crate::oklab::{lms_to_rgb_matrix, oklab_to_rgb, rgb_to_lms_matrix, rgb_to_oklab};
1520
1521/// Linear RGB f32 → Oklab f32 (3 channels).
1522///
1523/// # Panics
1524///
1525/// Panics if `primaries` is `Unknown`. The plan should have rejected this.
1526fn linear_rgb_to_oklab_f32(src: &[u8], dst: &mut [u8], width: usize, primaries: ColorPrimaries) {
1527    let m1 = rgb_to_lms_matrix(primaries)
1528        .expect("Oklab conversion requires known primaries (plan should have rejected Unknown)");
1529
1530    let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 12]);
1531    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 12]);
1532
1533    for i in 0..width {
1534        let s = i * 3;
1535        let [l, a, b] = rgb_to_oklab(srcf[s], srcf[s + 1], srcf[s + 2], &m1);
1536        dstf[s] = l;
1537        dstf[s + 1] = a;
1538        dstf[s + 2] = b;
1539    }
1540}
1541
1542/// Oklab f32 → Linear RGB f32 (3 channels).
1543fn oklab_to_linear_rgb_f32(src: &[u8], dst: &mut [u8], width: usize, primaries: ColorPrimaries) {
1544    let m1_inv = lms_to_rgb_matrix(primaries)
1545        .expect("Oklab conversion requires known primaries (plan should have rejected Unknown)");
1546
1547    let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 12]);
1548    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 12]);
1549
1550    for i in 0..width {
1551        let s = i * 3;
1552        let [r, g, b] = oklab_to_rgb(srcf[s], srcf[s + 1], srcf[s + 2], &m1_inv);
1553        dstf[s] = r;
1554        dstf[s + 1] = g;
1555        dstf[s + 2] = b;
1556    }
1557}
1558
1559/// Linear RGBA f32 → Oklaba f32 (4 channels, alpha preserved).
1560fn linear_rgba_to_oklaba_f32(src: &[u8], dst: &mut [u8], width: usize, primaries: ColorPrimaries) {
1561    let m1 = rgb_to_lms_matrix(primaries)
1562        .expect("Oklab conversion requires known primaries (plan should have rejected Unknown)");
1563
1564    let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 16]);
1565    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 16]);
1566
1567    for i in 0..width {
1568        let s = i * 4;
1569        let [l, a, b] = rgb_to_oklab(srcf[s], srcf[s + 1], srcf[s + 2], &m1);
1570        dstf[s] = l;
1571        dstf[s + 1] = a;
1572        dstf[s + 2] = b;
1573        dstf[s + 3] = srcf[s + 3]; // alpha unchanged
1574    }
1575}
1576
1577/// Oklaba f32 → Linear RGBA f32 (4 channels, alpha preserved).
1578fn oklaba_to_linear_rgba_f32(src: &[u8], dst: &mut [u8], width: usize, primaries: ColorPrimaries) {
1579    let m1_inv = lms_to_rgb_matrix(primaries)
1580        .expect("Oklab conversion requires known primaries (plan should have rejected Unknown)");
1581
1582    let srcf: &[f32] = bytemuck::cast_slice(&src[..width * 16]);
1583    let dstf: &mut [f32] = bytemuck::cast_slice_mut(&mut dst[..width * 16]);
1584
1585    for i in 0..width {
1586        let s = i * 4;
1587        let [r, g, b] = oklab_to_rgb(srcf[s], srcf[s + 1], srcf[s + 2], &m1_inv);
1588        dstf[s] = r;
1589        dstf[s + 1] = g;
1590        dstf[s + 2] = b;
1591        dstf[s + 3] = srcf[s + 3]; // alpha unchanged
1592    }
1593}