zenpixels_convert/
convert.rs

1//! Row-level pixel conversion kernels.
2//!
3//! Each kernel converts one row of `width` pixels from a source format to
4//! a destination format. Individual step kernels are pure functions with
5//! no allocation. Multi-step plans use [`ConvertScratch`] ping-pong
6//! buffers to avoid per-row heap allocation in streaming loops.
7
8use alloc::vec;
9use alloc::vec::Vec;
10use core::cmp::min;
11
12use crate::policy::{AlphaPolicy, ConvertOptions, DepthPolicy};
13use crate::{
14    AlphaMode, ChannelLayout, ChannelType, ColorPrimaries, ConvertError, PixelDescriptor,
15    TransferFunction,
16};
17use whereat::{At, ResultAtExt};
18
19/// Pre-computed conversion plan.
20///
21/// Stores the chain of steps needed to convert from one format to another.
22/// Created once, applied to every row.
23#[derive(Clone, Debug)]
24pub struct ConvertPlan {
25    pub(crate) from: PixelDescriptor,
26    pub(crate) to: PixelDescriptor,
27    pub(crate) steps: Vec<ConvertStep>,
28}
29
30/// A single conversion step.
31#[derive(Clone, Copy, Debug, PartialEq, Eq)]
32pub(crate) enum ConvertStep {
33    /// No-op (identity).
34    Identity,
35    /// BGRA → RGBA byte swizzle (or vice versa).
36    SwizzleBgraRgba,
37    /// Add alpha channel (3ch → 4ch), filling with opaque.
38    AddAlpha,
39    /// Drop alpha channel (4ch → 3ch).
40    DropAlpha,
41    /// Composite onto solid matte color, then drop alpha (4ch → 3ch).
42    ///
43    /// `out[c] = (src[c] * alpha + matte[c] * (255 - alpha) + 127) / 255`
44    /// Applied at u8 depth. For other depths, values are scaled.
45    MatteComposite { r: u8, g: u8, b: u8 },
46    /// Gray → RGB (replicate gray to all 3 channels).
47    GrayToRgb,
48    /// Gray → RGBA (replicate + opaque alpha).
49    GrayToRgba,
50    /// RGB → Gray (BT.709 luma).
51    RgbToGray,
52    /// RGBA → Gray (BT.709 luma, drop alpha).
53    RgbaToGray,
54    /// GrayAlpha → RGBA (replicate gray, keep alpha).
55    GrayAlphaToRgba,
56    /// GrayAlpha → RGB (replicate gray, drop alpha).
57    GrayAlphaToRgb,
58    /// Gray → GrayAlpha (add opaque alpha).
59    GrayToGrayAlpha,
60    /// GrayAlpha → Gray (drop alpha).
61    GrayAlphaToGray,
62    /// sRGB u8 → linear f32 (per channel, EOTF).
63    SrgbU8ToLinearF32,
64    /// Linear f32 → sRGB u8 (per channel, OETF).
65    LinearF32ToSrgbU8,
66    /// Naive u8 → f32 (v / 255.0, no gamma).
67    NaiveU8ToF32,
68    /// Naive f32 → u8 (clamp * 255 + 0.5, no gamma).
69    NaiveF32ToU8,
70    /// u16 → u8 ((v * 255 + 32768) >> 16).
71    U16ToU8,
72    /// u8 → u16 (v * 257).
73    U8ToU16,
74    /// u16 → f32 (v / 65535.0).
75    U16ToF32,
76    /// f32 → u16 (clamp * 65535 + 0.5).
77    F32ToU16,
78    /// PQ (SMPTE ST 2084) u16 → linear f32 (EOTF).
79    PqU16ToLinearF32,
80    /// Linear f32 → PQ u16 (inverse EOTF / OETF).
81    LinearF32ToPqU16,
82    /// PQ f32 [0,1] → linear f32 (EOTF, no depth change).
83    PqF32ToLinearF32,
84    /// Linear f32 → PQ f32 [0,1] (OETF, no depth change).
85    LinearF32ToPqF32,
86    /// HLG (ARIB STD-B67) u16 → linear f32 (EOTF).
87    HlgU16ToLinearF32,
88    /// Linear f32 → HLG u16 (OETF).
89    LinearF32ToHlgU16,
90    /// HLG f32 [0,1] → linear f32 (EOTF, no depth change).
91    HlgF32ToLinearF32,
92    /// Linear f32 → HLG f32 [0,1] (OETF, no depth change).
93    LinearF32ToHlgF32,
94    /// sRGB f32 [0,1] → linear f32 (EOTF, no depth change).
95    SrgbF32ToLinearF32,
96    /// Linear f32 → sRGB f32 [0,1] (OETF, no depth change).
97    LinearF32ToSrgbF32,
98    /// BT.709 f32 [0,1] → linear f32 (EOTF, no depth change).
99    Bt709F32ToLinearF32,
100    /// Linear f32 → BT.709 f32 [0,1] (OETF, no depth change).
101    LinearF32ToBt709F32,
102    /// Straight → Premultiplied alpha.
103    StraightToPremul,
104    /// Premultiplied → Straight alpha.
105    PremulToStraight,
106    /// Linear RGB f32 → Oklab f32 (3-channel color model change).
107    LinearRgbToOklab,
108    /// Oklab f32 → Linear RGB f32 (3-channel color model change).
109    OklabToLinearRgb,
110    /// Linear RGBA f32 → Oklaba f32 (4-channel, alpha preserved).
111    LinearRgbaToOklaba,
112    /// Oklaba f32 → Linear RGBA f32 (4-channel, alpha preserved).
113    OklabaToLinearRgba,
114}
115
116impl ConvertPlan {
117    /// Create a conversion plan from `from` to `to`.
118    ///
119    /// Returns `Err` if no conversion path exists.
120    #[track_caller]
121    pub fn new(from: PixelDescriptor, to: PixelDescriptor) -> Result<Self, At<ConvertError>> {
122        if from == to {
123            return Ok(Self {
124                from,
125                to,
126                steps: vec![ConvertStep::Identity],
127            });
128        }
129
130        let mut steps = Vec::with_capacity(3);
131
132        // Step 1: Layout conversion (within same depth class).
133        // Step 2: Depth conversion.
134        // Step 3: Alpha mode conversion.
135        //
136        // For cross-depth conversions, we convert layout at the source depth
137        // first, then change depth. This minimizes the number of channels
138        // we need to depth-convert.
139
140        let need_depth_change = from.channel_type() != to.channel_type();
141        let need_layout_change = from.layout() != to.layout();
142        let need_alpha_change =
143            from.alpha() != to.alpha() && from.alpha().is_some() && to.alpha().is_some();
144
145        // Depth/TF steps are needed when depth changes, or when both are F32
146        // and transfer functions differ.
147        let need_depth_or_tf = need_depth_change
148            || (from.channel_type() == ChannelType::F32 && from.transfer() != to.transfer());
149
150        // If we need to change depth AND layout, plan the optimal order.
151        if need_layout_change {
152            // When going to fewer channels, convert layout first (less depth work).
153            // When going to more channels, convert depth first (less layout work).
154            //
155            // Exception: Oklab layout steps require f32 data. When the source
156            // is integer (U8/U16) and the layout change involves Oklab, we must
157            // convert depth first regardless of channel count.
158            let src_ch = from.layout().channels();
159            let dst_ch = to.layout().channels();
160            let involves_oklab =
161                matches!(from.layout(), ChannelLayout::Oklab | ChannelLayout::OklabA)
162                    || matches!(to.layout(), ChannelLayout::Oklab | ChannelLayout::OklabA);
163
164            // Oklab conversion requires known primaries for the RGB→LMS matrix.
165            if involves_oklab && from.primaries == ColorPrimaries::Unknown {
166                return Err(whereat::at!(ConvertError::NoPath { from, to }));
167            }
168
169            let depth_first = need_depth_or_tf
170                && (dst_ch > src_ch || (involves_oklab && from.channel_type() != ChannelType::F32));
171
172            if depth_first {
173                // Depth first, then layout.
174                steps.extend(
175                    depth_steps(
176                        from.channel_type(),
177                        to.channel_type(),
178                        from.transfer(),
179                        to.transfer(),
180                    )
181                    .map_err(|e| whereat::at!(e))?,
182                );
183                steps.extend(layout_steps(from.layout(), to.layout()));
184            } else {
185                // Layout first, then depth.
186                steps.extend(layout_steps(from.layout(), to.layout()));
187                if need_depth_or_tf {
188                    steps.extend(
189                        depth_steps(
190                            from.channel_type(),
191                            to.channel_type(),
192                            from.transfer(),
193                            to.transfer(),
194                        )
195                        .map_err(|e| whereat::at!(e))?,
196                    );
197                }
198            }
199        } else if need_depth_or_tf {
200            steps.extend(
201                depth_steps(
202                    from.channel_type(),
203                    to.channel_type(),
204                    from.transfer(),
205                    to.transfer(),
206                )
207                .map_err(|e| whereat::at!(e))?,
208            );
209        }
210
211        // Alpha mode conversion (if both have alpha and modes differ).
212        if need_alpha_change {
213            match (from.alpha(), to.alpha()) {
214                (Some(AlphaMode::Straight), Some(AlphaMode::Premultiplied)) => {
215                    steps.push(ConvertStep::StraightToPremul);
216                }
217                (Some(AlphaMode::Premultiplied), Some(AlphaMode::Straight)) => {
218                    steps.push(ConvertStep::PremulToStraight);
219                }
220                _ => {}
221            }
222        }
223
224        if steps.is_empty() {
225            // Transfer-only difference or alpha-mode-only: identity path.
226            steps.push(ConvertStep::Identity);
227        }
228
229        Ok(Self { from, to, steps })
230    }
231
232    /// Create a conversion plan with explicit policy enforcement.
233    ///
234    /// Validates that the planned conversion steps are allowed by the given
235    /// policies before creating the plan. Returns an error if a forbidden
236    /// operation would be required.
237    #[track_caller]
238    pub fn new_explicit(
239        from: PixelDescriptor,
240        to: PixelDescriptor,
241        options: &ConvertOptions,
242    ) -> Result<Self, At<ConvertError>> {
243        // Check alpha removal policy.
244        let drops_alpha = from.alpha().is_some() && to.alpha().is_none();
245        if drops_alpha && options.alpha_policy == AlphaPolicy::Forbid {
246            return Err(whereat::at!(ConvertError::AlphaRemovalForbidden));
247        }
248
249        // Check depth reduction policy.
250        let reduces_depth = from.channel_type().byte_size() > to.channel_type().byte_size();
251        if reduces_depth && options.depth_policy == DepthPolicy::Forbid {
252            return Err(whereat::at!(ConvertError::DepthReductionForbidden));
253        }
254
255        // Check RGB→Gray requires luma coefficients.
256        let src_is_rgb = matches!(
257            from.layout(),
258            ChannelLayout::Rgb | ChannelLayout::Rgba | ChannelLayout::Bgra
259        );
260        let dst_is_gray = matches!(to.layout(), ChannelLayout::Gray | ChannelLayout::GrayAlpha);
261        if src_is_rgb && dst_is_gray && options.luma.is_none() {
262            return Err(whereat::at!(ConvertError::RgbToGray));
263        }
264
265        let mut plan = Self::new(from, to).at()?;
266
267        // Replace DropAlpha with MatteComposite when policy is CompositeOnto.
268        if drops_alpha && let AlphaPolicy::CompositeOnto { r, g, b } = options.alpha_policy {
269            for step in &mut plan.steps {
270                if matches!(step, ConvertStep::DropAlpha) {
271                    *step = ConvertStep::MatteComposite { r, g, b };
272                }
273            }
274        }
275
276        Ok(plan)
277    }
278
279    /// Compose two plans into one: apply `self` then `other`.
280    ///
281    /// The composed plan executes both conversions in a single `convert_row`
282    /// call, using one intermediate buffer instead of two. Adjacent inverse
283    /// steps are cancelled (e.g., `SrgbU8ToLinearF32` + `LinearF32ToSrgbU8`
284    /// → identity).
285    ///
286    /// Returns `None` if `self.to` != `other.from` (incompatible plans).
287    pub fn compose(&self, other: &Self) -> Option<Self> {
288        if self.to != other.from {
289            return None;
290        }
291
292        let mut steps = self.steps.clone();
293
294        // Append other's steps, skipping its Identity if present.
295        for &step in &other.steps {
296            if step == ConvertStep::Identity {
297                continue;
298            }
299            steps.push(step);
300        }
301
302        // Peephole: cancel adjacent inverse pairs.
303        let mut changed = true;
304        while changed {
305            changed = false;
306            let mut i = 0;
307            while i + 1 < steps.len() {
308                if are_inverse(steps[i], steps[i + 1]) {
309                    steps.remove(i + 1);
310                    steps.remove(i);
311                    changed = true;
312                    // Don't advance — check the new adjacent pair.
313                } else {
314                    i += 1;
315                }
316            }
317        }
318
319        // If everything cancelled, produce identity.
320        if steps.is_empty() {
321            steps.push(ConvertStep::Identity);
322        }
323
324        // Remove leading/trailing Identity if there are real steps.
325        if steps.len() > 1 {
326            steps.retain(|s| *s != ConvertStep::Identity);
327            if steps.is_empty() {
328                steps.push(ConvertStep::Identity);
329            }
330        }
331
332        Some(Self {
333            from: self.from,
334            to: other.to,
335            steps,
336        })
337    }
338
339    /// True if conversion is a no-op.
340    #[must_use]
341    pub fn is_identity(&self) -> bool {
342        self.steps.len() == 1 && self.steps[0] == ConvertStep::Identity
343    }
344
345    /// Maximum bytes-per-pixel across all intermediate formats in the plan.
346    ///
347    /// Used to pre-allocate scratch buffers for streaming conversion.
348    pub(crate) fn max_intermediate_bpp(&self) -> usize {
349        let mut desc = self.from;
350        let mut max_bpp = desc.bytes_per_pixel();
351        for &step in &self.steps {
352            desc = intermediate_desc(desc, step);
353            max_bpp = max_bpp.max(desc.bytes_per_pixel());
354        }
355        max_bpp
356    }
357
358    /// Source descriptor.
359    pub fn from(&self) -> PixelDescriptor {
360        self.from
361    }
362
363    /// Target descriptor.
364    pub fn to(&self) -> PixelDescriptor {
365        self.to
366    }
367}
368
369/// Determine the layout conversion step(s).
370///
371/// Some layout conversions require two steps (e.g., BGRA -> RGB needs
372/// swizzle + drop alpha). Returns up to 2 steps.
373fn layout_steps(from: ChannelLayout, to: ChannelLayout) -> Vec<ConvertStep> {
374    if from == to {
375        return Vec::new();
376    }
377    match (from, to) {
378        (ChannelLayout::Bgra, ChannelLayout::Rgba) | (ChannelLayout::Rgba, ChannelLayout::Bgra) => {
379            vec![ConvertStep::SwizzleBgraRgba]
380        }
381        (ChannelLayout::Rgb, ChannelLayout::Rgba) => vec![ConvertStep::AddAlpha],
382        (ChannelLayout::Rgb, ChannelLayout::Bgra) => {
383            // Rgb -> RGBA -> BGRA: add alpha then swizzle.
384            vec![ConvertStep::AddAlpha, ConvertStep::SwizzleBgraRgba]
385        }
386        (ChannelLayout::Rgba, ChannelLayout::Rgb) => vec![ConvertStep::DropAlpha],
387        (ChannelLayout::Bgra, ChannelLayout::Rgb) => {
388            // BGRA -> RGBA -> RGB: swizzle then drop alpha.
389            vec![ConvertStep::SwizzleBgraRgba, ConvertStep::DropAlpha]
390        }
391        (ChannelLayout::Gray, ChannelLayout::Rgb) => vec![ConvertStep::GrayToRgb],
392        (ChannelLayout::Gray, ChannelLayout::Rgba) => vec![ConvertStep::GrayToRgba],
393        (ChannelLayout::Gray, ChannelLayout::Bgra) => {
394            // Gray -> RGBA -> BGRA: expand then swizzle.
395            vec![ConvertStep::GrayToRgba, ConvertStep::SwizzleBgraRgba]
396        }
397        (ChannelLayout::Rgb, ChannelLayout::Gray) => vec![ConvertStep::RgbToGray],
398        (ChannelLayout::Rgba, ChannelLayout::Gray) => vec![ConvertStep::RgbaToGray],
399        (ChannelLayout::Bgra, ChannelLayout::Gray) => {
400            // BGRA -> RGBA -> Gray: swizzle then to gray.
401            vec![ConvertStep::SwizzleBgraRgba, ConvertStep::RgbaToGray]
402        }
403        (ChannelLayout::GrayAlpha, ChannelLayout::Rgba) => vec![ConvertStep::GrayAlphaToRgba],
404        (ChannelLayout::GrayAlpha, ChannelLayout::Bgra) => {
405            // GrayAlpha -> RGBA -> BGRA: expand then swizzle.
406            vec![ConvertStep::GrayAlphaToRgba, ConvertStep::SwizzleBgraRgba]
407        }
408        (ChannelLayout::GrayAlpha, ChannelLayout::Rgb) => vec![ConvertStep::GrayAlphaToRgb],
409        (ChannelLayout::Gray, ChannelLayout::GrayAlpha) => vec![ConvertStep::GrayToGrayAlpha],
410        (ChannelLayout::GrayAlpha, ChannelLayout::Gray) => vec![ConvertStep::GrayAlphaToGray],
411
412        // Oklab ↔ RGB conversions (via linear RGB).
413        (ChannelLayout::Rgb, ChannelLayout::Oklab) => vec![ConvertStep::LinearRgbToOklab],
414        (ChannelLayout::Oklab, ChannelLayout::Rgb) => vec![ConvertStep::OklabToLinearRgb],
415        (ChannelLayout::Rgba, ChannelLayout::OklabA) => vec![ConvertStep::LinearRgbaToOklaba],
416        (ChannelLayout::OklabA, ChannelLayout::Rgba) => vec![ConvertStep::OklabaToLinearRgba],
417
418        // Oklab ↔ RGB with alpha add/drop.
419        (ChannelLayout::Rgb, ChannelLayout::OklabA) => {
420            vec![ConvertStep::AddAlpha, ConvertStep::LinearRgbaToOklaba]
421        }
422        (ChannelLayout::OklabA, ChannelLayout::Rgb) => {
423            vec![ConvertStep::OklabaToLinearRgba, ConvertStep::DropAlpha]
424        }
425        (ChannelLayout::Oklab, ChannelLayout::Rgba) => {
426            vec![ConvertStep::OklabToLinearRgb, ConvertStep::AddAlpha]
427        }
428        (ChannelLayout::Rgba, ChannelLayout::Oklab) => {
429            vec![ConvertStep::DropAlpha, ConvertStep::LinearRgbToOklab]
430        }
431
432        // Oklab ↔ BGRA (swizzle to/from RGBA, then Oklab).
433        (ChannelLayout::Bgra, ChannelLayout::OklabA) => {
434            vec![
435                ConvertStep::SwizzleBgraRgba,
436                ConvertStep::LinearRgbaToOklaba,
437            ]
438        }
439        (ChannelLayout::OklabA, ChannelLayout::Bgra) => {
440            vec![
441                ConvertStep::OklabaToLinearRgba,
442                ConvertStep::SwizzleBgraRgba,
443            ]
444        }
445        (ChannelLayout::Bgra, ChannelLayout::Oklab) => {
446            vec![
447                ConvertStep::SwizzleBgraRgba,
448                ConvertStep::DropAlpha,
449                ConvertStep::LinearRgbToOklab,
450            ]
451        }
452        (ChannelLayout::Oklab, ChannelLayout::Bgra) => {
453            vec![
454                ConvertStep::OklabToLinearRgb,
455                ConvertStep::AddAlpha,
456                ConvertStep::SwizzleBgraRgba,
457            ]
458        }
459
460        // Gray ↔ Oklab (expand gray to RGB first).
461        (ChannelLayout::Gray, ChannelLayout::Oklab) => {
462            vec![ConvertStep::GrayToRgb, ConvertStep::LinearRgbToOklab]
463        }
464        (ChannelLayout::Oklab, ChannelLayout::Gray) => {
465            vec![ConvertStep::OklabToLinearRgb, ConvertStep::RgbToGray]
466        }
467        (ChannelLayout::Gray, ChannelLayout::OklabA) => {
468            vec![ConvertStep::GrayToRgba, ConvertStep::LinearRgbaToOklaba]
469        }
470        (ChannelLayout::OklabA, ChannelLayout::Gray) => {
471            vec![ConvertStep::OklabaToLinearRgba, ConvertStep::RgbaToGray]
472        }
473        (ChannelLayout::GrayAlpha, ChannelLayout::OklabA) => {
474            vec![
475                ConvertStep::GrayAlphaToRgba,
476                ConvertStep::LinearRgbaToOklaba,
477            ]
478        }
479        (ChannelLayout::OklabA, ChannelLayout::GrayAlpha) => {
480            // Drop alpha from OklabA→Oklab, convert to RGB, then to GrayAlpha.
481            // Alpha is lost; this is inherently lossy.
482            vec![
483                ConvertStep::OklabaToLinearRgba,
484                ConvertStep::RgbaToGray,
485                ConvertStep::GrayToGrayAlpha,
486            ]
487        }
488        (ChannelLayout::GrayAlpha, ChannelLayout::Oklab) => {
489            vec![ConvertStep::GrayAlphaToRgb, ConvertStep::LinearRgbToOklab]
490        }
491        (ChannelLayout::Oklab, ChannelLayout::GrayAlpha) => {
492            vec![
493                ConvertStep::OklabToLinearRgb,
494                ConvertStep::RgbToGray,
495                ConvertStep::GrayToGrayAlpha,
496            ]
497        }
498
499        // Oklab ↔ alpha variants.
500        (ChannelLayout::Oklab, ChannelLayout::OklabA) => vec![ConvertStep::AddAlpha],
501        (ChannelLayout::OklabA, ChannelLayout::Oklab) => vec![ConvertStep::DropAlpha],
502
503        _ => Vec::new(), // Unsupported layout conversion.
504    }
505}
506
507/// Determine the depth conversion step(s), considering transfer functions.
508///
509/// Returns one or two steps. Two steps are needed when the conversion
510/// requires going through an intermediate format (e.g. PQ U16 → sRGB U8
511/// goes PQ U16 → Linear F32 → sRGB U8).
512fn depth_steps(
513    from: ChannelType,
514    to: ChannelType,
515    from_tf: TransferFunction,
516    to_tf: TransferFunction,
517) -> Result<Vec<ConvertStep>, ConvertError> {
518    if from == to && from_tf == to_tf {
519        return Ok(Vec::new());
520    }
521
522    // Same depth, different transfer function.
523    // For integer types, TF changes are metadata-only (no math).
524    // For F32, we can apply EOTF/OETF in place.
525    if from == to && from != ChannelType::F32 {
526        return Ok(Vec::new());
527    }
528
529    if from == to && from == ChannelType::F32 {
530        return match (from_tf, to_tf) {
531            (TransferFunction::Pq, TransferFunction::Linear) => {
532                Ok(vec![ConvertStep::PqF32ToLinearF32])
533            }
534            (TransferFunction::Linear, TransferFunction::Pq) => {
535                Ok(vec![ConvertStep::LinearF32ToPqF32])
536            }
537            (TransferFunction::Hlg, TransferFunction::Linear) => {
538                Ok(vec![ConvertStep::HlgF32ToLinearF32])
539            }
540            (TransferFunction::Linear, TransferFunction::Hlg) => {
541                Ok(vec![ConvertStep::LinearF32ToHlgF32])
542            }
543            // PQ ↔ HLG: go through linear.
544            (TransferFunction::Pq, TransferFunction::Hlg) => Ok(vec![
545                ConvertStep::PqF32ToLinearF32,
546                ConvertStep::LinearF32ToHlgF32,
547            ]),
548            (TransferFunction::Hlg, TransferFunction::Pq) => Ok(vec![
549                ConvertStep::HlgF32ToLinearF32,
550                ConvertStep::LinearF32ToPqF32,
551            ]),
552            (TransferFunction::Srgb, TransferFunction::Linear) => {
553                Ok(vec![ConvertStep::SrgbF32ToLinearF32])
554            }
555            (TransferFunction::Linear, TransferFunction::Srgb) => {
556                Ok(vec![ConvertStep::LinearF32ToSrgbF32])
557            }
558            (TransferFunction::Bt709, TransferFunction::Linear) => {
559                Ok(vec![ConvertStep::Bt709F32ToLinearF32])
560            }
561            (TransferFunction::Linear, TransferFunction::Bt709) => {
562                Ok(vec![ConvertStep::LinearF32ToBt709F32])
563            }
564            // sRGB ↔ BT.709: go through linear.
565            (TransferFunction::Srgb, TransferFunction::Bt709) => Ok(vec![
566                ConvertStep::SrgbF32ToLinearF32,
567                ConvertStep::LinearF32ToBt709F32,
568            ]),
569            (TransferFunction::Bt709, TransferFunction::Srgb) => Ok(vec![
570                ConvertStep::Bt709F32ToLinearF32,
571                ConvertStep::LinearF32ToSrgbF32,
572            ]),
573            // sRGB/BT.709 ↔ PQ/HLG: go through linear.
574            (TransferFunction::Srgb, TransferFunction::Pq) => Ok(vec![
575                ConvertStep::SrgbF32ToLinearF32,
576                ConvertStep::LinearF32ToPqF32,
577            ]),
578            (TransferFunction::Srgb, TransferFunction::Hlg) => Ok(vec![
579                ConvertStep::SrgbF32ToLinearF32,
580                ConvertStep::LinearF32ToHlgF32,
581            ]),
582            (TransferFunction::Pq, TransferFunction::Srgb) => Ok(vec![
583                ConvertStep::PqF32ToLinearF32,
584                ConvertStep::LinearF32ToSrgbF32,
585            ]),
586            (TransferFunction::Hlg, TransferFunction::Srgb) => Ok(vec![
587                ConvertStep::HlgF32ToLinearF32,
588                ConvertStep::LinearF32ToSrgbF32,
589            ]),
590            (TransferFunction::Bt709, TransferFunction::Pq) => Ok(vec![
591                ConvertStep::Bt709F32ToLinearF32,
592                ConvertStep::LinearF32ToPqF32,
593            ]),
594            (TransferFunction::Bt709, TransferFunction::Hlg) => Ok(vec![
595                ConvertStep::Bt709F32ToLinearF32,
596                ConvertStep::LinearF32ToHlgF32,
597            ]),
598            (TransferFunction::Pq, TransferFunction::Bt709) => Ok(vec![
599                ConvertStep::PqF32ToLinearF32,
600                ConvertStep::LinearF32ToBt709F32,
601            ]),
602            (TransferFunction::Hlg, TransferFunction::Bt709) => Ok(vec![
603                ConvertStep::HlgF32ToLinearF32,
604                ConvertStep::LinearF32ToBt709F32,
605            ]),
606            _ => Ok(Vec::new()),
607        };
608    }
609
610    match (from, to) {
611        (ChannelType::U8, ChannelType::F32) => {
612            if (from_tf == TransferFunction::Srgb || from_tf == TransferFunction::Bt709)
613                && to_tf == TransferFunction::Linear
614            {
615                Ok(vec![ConvertStep::SrgbU8ToLinearF32])
616            } else {
617                Ok(vec![ConvertStep::NaiveU8ToF32])
618            }
619        }
620        (ChannelType::F32, ChannelType::U8) => {
621            if from_tf == TransferFunction::Linear
622                && (to_tf == TransferFunction::Srgb || to_tf == TransferFunction::Bt709)
623            {
624                Ok(vec![ConvertStep::LinearF32ToSrgbU8])
625            } else {
626                Ok(vec![ConvertStep::NaiveF32ToU8])
627            }
628        }
629        (ChannelType::U16, ChannelType::F32) => {
630            // PQ/HLG U16 → Linear F32: apply EOTF during conversion.
631            match (from_tf, to_tf) {
632                (TransferFunction::Pq, TransferFunction::Linear) => {
633                    Ok(vec![ConvertStep::PqU16ToLinearF32])
634                }
635                (TransferFunction::Hlg, TransferFunction::Linear) => {
636                    Ok(vec![ConvertStep::HlgU16ToLinearF32])
637                }
638                _ => Ok(vec![ConvertStep::U16ToF32]),
639            }
640        }
641        (ChannelType::F32, ChannelType::U16) => {
642            // Linear F32 → PQ/HLG U16: apply OETF during conversion.
643            match (from_tf, to_tf) {
644                (TransferFunction::Linear, TransferFunction::Pq) => {
645                    Ok(vec![ConvertStep::LinearF32ToPqU16])
646                }
647                (TransferFunction::Linear, TransferFunction::Hlg) => {
648                    Ok(vec![ConvertStep::LinearF32ToHlgU16])
649                }
650                _ => Ok(vec![ConvertStep::F32ToU16]),
651            }
652        }
653        (ChannelType::U16, ChannelType::U8) => {
654            // HDR U16 → SDR U8: go through linear F32 with proper EOTF → OETF.
655            if from_tf == TransferFunction::Pq && to_tf == TransferFunction::Srgb {
656                Ok(vec![
657                    ConvertStep::PqU16ToLinearF32,
658                    ConvertStep::LinearF32ToSrgbU8,
659                ])
660            } else if from_tf == TransferFunction::Hlg && to_tf == TransferFunction::Srgb {
661                Ok(vec![
662                    ConvertStep::HlgU16ToLinearF32,
663                    ConvertStep::LinearF32ToSrgbU8,
664                ])
665            } else {
666                Ok(vec![ConvertStep::U16ToU8])
667            }
668        }
669        (ChannelType::U8, ChannelType::U16) => Ok(vec![ConvertStep::U8ToU16]),
670        _ => Err(ConvertError::NoPath {
671            from: PixelDescriptor::new(from, ChannelLayout::Rgb, None, from_tf),
672            to: PixelDescriptor::new(to, ChannelLayout::Rgb, None, to_tf),
673        }),
674    }
675}
676
677// ---------------------------------------------------------------------------
678// Row conversion kernels
679// ---------------------------------------------------------------------------
680
681/// Pre-allocated scratch buffer for multi-step row conversions.
682///
683/// Eliminates per-row heap allocation by reusing two ping-pong halves
684/// of a single buffer across calls. Create once per [`ConvertPlan`],
685/// then pass to `convert_row_buffered` for each row.
686pub(crate) struct ConvertScratch {
687    /// Single allocation split into two halves via `split_at_mut`.
688    /// Stored as `Vec<u32>` to guarantee 4-byte alignment, which lets
689    /// garb and bytemuck use fast aligned paths instead of unaligned fallbacks.
690    buf: Vec<u32>,
691}
692
693impl ConvertScratch {
694    /// Create empty scratch (buffer grows on first use).
695    pub(crate) fn new() -> Self {
696        Self { buf: Vec::new() }
697    }
698
699    /// Ensure the buffer is large enough for two halves of the max
700    /// intermediate format at the given width.
701    fn ensure_capacity(&mut self, plan: &ConvertPlan, width: u32) {
702        let half_bytes = (width as usize) * plan.max_intermediate_bpp();
703        let total_u32 = (half_bytes * 2).div_ceil(4);
704        if self.buf.len() < total_u32 {
705            self.buf.resize(total_u32, 0);
706        }
707    }
708}
709
710impl core::fmt::Debug for ConvertScratch {
711    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
712        f.debug_struct("ConvertScratch")
713            .field("capacity", &self.buf.capacity())
714            .finish()
715    }
716}
717
718/// Convert one row of `width` pixels using a pre-computed plan.
719///
720/// `src` and `dst` must be sized for `width` pixels in their respective formats.
721/// For multi-step plans, an internal scratch buffer is allocated per call.
722/// Prefer [`RowConverter`](crate::RowConverter) in hot loops (reuses scratch buffers).
723pub fn convert_row(plan: &ConvertPlan, src: &[u8], dst: &mut [u8], width: u32) {
724    if plan.is_identity() {
725        let len = min(src.len(), dst.len());
726        dst[..len].copy_from_slice(&src[..len]);
727        return;
728    }
729
730    if plan.steps.len() == 1 {
731        apply_step_u8(plan.steps[0], src, dst, width, plan.from, plan.to);
732        return;
733    }
734
735    // Allocating fallback for one-off calls.
736    let mut scratch = ConvertScratch::new();
737    convert_row_buffered(plan, src, dst, width, &mut scratch);
738}
739
740/// Convert one row of `width` pixels, reusing pre-allocated scratch buffers.
741///
742/// For multi-step plans this avoids per-row heap allocation by ping-ponging
743/// between two halves of a scratch buffer. Single-step plans bypass scratch.
744pub(crate) fn convert_row_buffered(
745    plan: &ConvertPlan,
746    src: &[u8],
747    dst: &mut [u8],
748    width: u32,
749    scratch: &mut ConvertScratch,
750) {
751    if plan.is_identity() {
752        let len = min(src.len(), dst.len());
753        dst[..len].copy_from_slice(&src[..len]);
754        return;
755    }
756
757    if plan.steps.len() == 1 {
758        apply_step_u8(plan.steps[0], src, dst, width, plan.from, plan.to);
759        return;
760    }
761
762    scratch.ensure_capacity(plan, width);
763
764    let buf_bytes: &mut [u8] = bytemuck::cast_slice_mut(&mut scratch.buf);
765    let half = buf_bytes.len() / 2;
766    let (buf_a, buf_b) = buf_bytes.split_at_mut(half);
767
768    let num_steps = plan.steps.len();
769    let mut current_desc = plan.from;
770
771    for (i, &step) in plan.steps.iter().enumerate() {
772        let is_last = i == num_steps - 1;
773        let next_desc = if is_last {
774            plan.to
775        } else {
776            intermediate_desc(current_desc, step)
777        };
778
779        let next_len = (width as usize) * next_desc.bytes_per_pixel();
780        let curr_len = (width as usize) * current_desc.bytes_per_pixel();
781
782        // Ping-pong: even steps read src/buf_b and write buf_a;
783        // odd steps read buf_a and write buf_b. Each branch only
784        // borrows each half in one mode, satisfying the borrow checker.
785        if i % 2 == 0 {
786            let input = if i == 0 { src } else { &buf_b[..curr_len] };
787            if is_last {
788                apply_step_u8(step, input, dst, width, current_desc, next_desc);
789            } else {
790                apply_step_u8(
791                    step,
792                    input,
793                    &mut buf_a[..next_len],
794                    width,
795                    current_desc,
796                    next_desc,
797                );
798            }
799        } else {
800            let input = &buf_a[..curr_len];
801            if is_last {
802                apply_step_u8(step, input, dst, width, current_desc, next_desc);
803            } else {
804                apply_step_u8(
805                    step,
806                    input,
807                    &mut buf_b[..next_len],
808                    width,
809                    current_desc,
810                    next_desc,
811                );
812            }
813        }
814
815        current_desc = next_desc;
816    }
817}
818
819/// Check if two steps are inverses that cancel each other.
820fn are_inverse(a: ConvertStep, b: ConvertStep) -> bool {
821    matches!(
822        (a, b),
823        // Self-inverse
824        (ConvertStep::SwizzleBgraRgba, ConvertStep::SwizzleBgraRgba)
825        // Layout inverses (lossless for opaque data)
826        | (ConvertStep::AddAlpha, ConvertStep::DropAlpha)
827        // Transfer function f32↔f32 (exact inverses in float)
828        | (ConvertStep::SrgbF32ToLinearF32, ConvertStep::LinearF32ToSrgbF32)
829        | (ConvertStep::LinearF32ToSrgbF32, ConvertStep::SrgbF32ToLinearF32)
830        | (ConvertStep::PqF32ToLinearF32, ConvertStep::LinearF32ToPqF32)
831        | (ConvertStep::LinearF32ToPqF32, ConvertStep::PqF32ToLinearF32)
832        | (ConvertStep::HlgF32ToLinearF32, ConvertStep::LinearF32ToHlgF32)
833        | (ConvertStep::LinearF32ToHlgF32, ConvertStep::HlgF32ToLinearF32)
834        | (ConvertStep::Bt709F32ToLinearF32, ConvertStep::LinearF32ToBt709F32)
835        | (ConvertStep::LinearF32ToBt709F32, ConvertStep::Bt709F32ToLinearF32)
836        // Alpha mode (exact inverses in float)
837        | (ConvertStep::StraightToPremul, ConvertStep::PremulToStraight)
838        | (ConvertStep::PremulToStraight, ConvertStep::StraightToPremul)
839        // Color model (exact inverses in float)
840        | (ConvertStep::LinearRgbToOklab, ConvertStep::OklabToLinearRgb)
841        | (ConvertStep::OklabToLinearRgb, ConvertStep::LinearRgbToOklab)
842        | (ConvertStep::LinearRgbaToOklaba, ConvertStep::OklabaToLinearRgba)
843        | (ConvertStep::OklabaToLinearRgba, ConvertStep::LinearRgbaToOklaba)
844        // Cross-depth pairs (near-lossless for same depth class)
845        | (ConvertStep::NaiveU8ToF32, ConvertStep::NaiveF32ToU8)
846        | (ConvertStep::NaiveF32ToU8, ConvertStep::NaiveU8ToF32)
847        | (ConvertStep::U8ToU16, ConvertStep::U16ToU8)
848        | (ConvertStep::U16ToU8, ConvertStep::U8ToU16)
849        | (ConvertStep::U16ToF32, ConvertStep::F32ToU16)
850        | (ConvertStep::F32ToU16, ConvertStep::U16ToF32)
851        // Cross-depth with transfer (near-lossless roundtrip)
852        | (ConvertStep::SrgbU8ToLinearF32, ConvertStep::LinearF32ToSrgbU8)
853        | (ConvertStep::LinearF32ToSrgbU8, ConvertStep::SrgbU8ToLinearF32)
854        | (ConvertStep::PqU16ToLinearF32, ConvertStep::LinearF32ToPqU16)
855        | (ConvertStep::LinearF32ToPqU16, ConvertStep::PqU16ToLinearF32)
856        | (ConvertStep::HlgU16ToLinearF32, ConvertStep::LinearF32ToHlgU16)
857        | (ConvertStep::LinearF32ToHlgU16, ConvertStep::HlgU16ToLinearF32)
858    )
859}
860
861/// Compute the descriptor after applying one step.
862fn intermediate_desc(current: PixelDescriptor, step: ConvertStep) -> PixelDescriptor {
863    match step {
864        ConvertStep::Identity => current,
865        ConvertStep::SwizzleBgraRgba => {
866            let new_layout = match current.layout() {
867                ChannelLayout::Bgra => ChannelLayout::Rgba,
868                ChannelLayout::Rgba => ChannelLayout::Bgra,
869                other => other,
870            };
871            PixelDescriptor::new(
872                current.channel_type(),
873                new_layout,
874                current.alpha(),
875                current.transfer(),
876            )
877        }
878        ConvertStep::AddAlpha => PixelDescriptor::new(
879            current.channel_type(),
880            ChannelLayout::Rgba,
881            Some(AlphaMode::Straight),
882            current.transfer(),
883        ),
884        ConvertStep::DropAlpha | ConvertStep::MatteComposite { .. } => PixelDescriptor::new(
885            current.channel_type(),
886            ChannelLayout::Rgb,
887            None,
888            current.transfer(),
889        ),
890        ConvertStep::GrayToRgb => PixelDescriptor::new(
891            current.channel_type(),
892            ChannelLayout::Rgb,
893            None,
894            current.transfer(),
895        ),
896        ConvertStep::GrayToRgba => PixelDescriptor::new(
897            current.channel_type(),
898            ChannelLayout::Rgba,
899            Some(AlphaMode::Straight),
900            current.transfer(),
901        ),
902        ConvertStep::RgbToGray | ConvertStep::RgbaToGray => PixelDescriptor::new(
903            current.channel_type(),
904            ChannelLayout::Gray,
905            None,
906            current.transfer(),
907        ),
908        ConvertStep::GrayAlphaToRgba => PixelDescriptor::new(
909            current.channel_type(),
910            ChannelLayout::Rgba,
911            current.alpha(),
912            current.transfer(),
913        ),
914        ConvertStep::GrayAlphaToRgb => PixelDescriptor::new(
915            current.channel_type(),
916            ChannelLayout::Rgb,
917            None,
918            current.transfer(),
919        ),
920        ConvertStep::GrayToGrayAlpha => PixelDescriptor::new(
921            current.channel_type(),
922            ChannelLayout::GrayAlpha,
923            Some(AlphaMode::Straight),
924            current.transfer(),
925        ),
926        ConvertStep::GrayAlphaToGray => PixelDescriptor::new(
927            current.channel_type(),
928            ChannelLayout::Gray,
929            None,
930            current.transfer(),
931        ),
932        ConvertStep::SrgbU8ToLinearF32
933        | ConvertStep::NaiveU8ToF32
934        | ConvertStep::U16ToF32
935        | ConvertStep::PqU16ToLinearF32
936        | ConvertStep::HlgU16ToLinearF32
937        | ConvertStep::PqF32ToLinearF32
938        | ConvertStep::HlgF32ToLinearF32
939        | ConvertStep::SrgbF32ToLinearF32
940        | ConvertStep::Bt709F32ToLinearF32 => PixelDescriptor::new(
941            ChannelType::F32,
942            current.layout(),
943            current.alpha(),
944            TransferFunction::Linear,
945        ),
946        ConvertStep::LinearF32ToSrgbU8 | ConvertStep::NaiveF32ToU8 | ConvertStep::U16ToU8 => {
947            PixelDescriptor::new(
948                ChannelType::U8,
949                current.layout(),
950                current.alpha(),
951                TransferFunction::Srgb,
952            )
953        }
954        ConvertStep::U8ToU16 => PixelDescriptor::new(
955            ChannelType::U16,
956            current.layout(),
957            current.alpha(),
958            current.transfer(),
959        ),
960        ConvertStep::F32ToU16 | ConvertStep::LinearF32ToPqU16 | ConvertStep::LinearF32ToHlgU16 => {
961            let tf = match step {
962                ConvertStep::LinearF32ToPqU16 => TransferFunction::Pq,
963                ConvertStep::LinearF32ToHlgU16 => TransferFunction::Hlg,
964                _ => current.transfer(),
965            };
966            PixelDescriptor::new(ChannelType::U16, current.layout(), current.alpha(), tf)
967        }
968        ConvertStep::LinearF32ToPqF32 => PixelDescriptor::new(
969            ChannelType::F32,
970            current.layout(),
971            current.alpha(),
972            TransferFunction::Pq,
973        ),
974        ConvertStep::LinearF32ToHlgF32 => PixelDescriptor::new(
975            ChannelType::F32,
976            current.layout(),
977            current.alpha(),
978            TransferFunction::Hlg,
979        ),
980        ConvertStep::LinearF32ToSrgbF32 => PixelDescriptor::new(
981            ChannelType::F32,
982            current.layout(),
983            current.alpha(),
984            TransferFunction::Srgb,
985        ),
986        ConvertStep::LinearF32ToBt709F32 => PixelDescriptor::new(
987            ChannelType::F32,
988            current.layout(),
989            current.alpha(),
990            TransferFunction::Bt709,
991        ),
992        ConvertStep::StraightToPremul => PixelDescriptor::new(
993            current.channel_type(),
994            current.layout(),
995            Some(AlphaMode::Premultiplied),
996            current.transfer(),
997        ),
998        ConvertStep::PremulToStraight => PixelDescriptor::new(
999            current.channel_type(),
1000            current.layout(),
1001            Some(AlphaMode::Straight),
1002            current.transfer(),
1003        ),
1004        ConvertStep::LinearRgbToOklab => PixelDescriptor::new(
1005            ChannelType::F32,
1006            ChannelLayout::Oklab,
1007            None,
1008            TransferFunction::Unknown,
1009        )
1010        .with_primaries(current.primaries),
1011        ConvertStep::OklabToLinearRgb => PixelDescriptor::new(
1012            ChannelType::F32,
1013            ChannelLayout::Rgb,
1014            None,
1015            TransferFunction::Linear,
1016        )
1017        .with_primaries(current.primaries),
1018        ConvertStep::LinearRgbaToOklaba => PixelDescriptor::new(
1019            ChannelType::F32,
1020            ChannelLayout::OklabA,
1021            Some(AlphaMode::Straight),
1022            TransferFunction::Unknown,
1023        )
1024        .with_primaries(current.primaries),
1025        ConvertStep::OklabaToLinearRgba => PixelDescriptor::new(
1026            ChannelType::F32,
1027            ChannelLayout::Rgba,
1028            current.alpha(),
1029            TransferFunction::Linear,
1030        )
1031        .with_primaries(current.primaries),
1032    }
1033}
1034
1035#[path = "convert_kernels.rs"]
1036mod convert_kernels;
1037use convert_kernels::apply_step_u8;
1038pub(crate) use convert_kernels::{hlg_eotf, hlg_oetf, pq_eotf, pq_oetf};
zenpixels_convert/convert.rs

zenpixels_convert/
convert.rs