zenpixels_convert/
convert.rs

1//! Row-level pixel conversion kernels.
2//!
3//! Each kernel converts one row of `width` pixels from a source format to
4//! a destination format. Individual step kernels are pure functions with
5//! no allocation. Multi-step plans use [`ConvertScratch`] ping-pong
6//! buffers to avoid per-row heap allocation in streaming loops.
7
8use alloc::vec;
9use alloc::vec::Vec;
10use core::cmp::min;
11
12use crate::policy::{AlphaPolicy, ConvertOptions, DepthPolicy, LumaCoefficients};
13use crate::{
14    AlphaMode, ChannelLayout, ChannelType, ColorPrimaries, ConvertError, PixelDescriptor,
15    TransferFunction,
16};
17use whereat::{At, ResultAtExt};
18
19/// Pre-computed conversion plan.
20///
21/// Stores the chain of steps needed to convert from one format to another.
22/// Created once, applied to every row.
23#[derive(Clone, Debug)]
24pub struct ConvertPlan {
25    pub(crate) from: PixelDescriptor,
26    pub(crate) to: PixelDescriptor,
27    pub(crate) steps: Vec<ConvertStep>,
28}
29
30/// A single conversion step.
31///
32/// Not `Copy` — some variants (e.g., [`ExternalTransform`]) carry an
33/// `Arc`. Peephole rewrites must use `.clone()` or index assignment with
34/// pattern matching instead of `*step` dereferences.
35///
36/// [`ExternalTransform`]: ConvertStep::ExternalTransform
37#[derive(Clone)]
38pub(crate) enum ConvertStep {
39    /// No-op (identity).
40    Identity,
41    /// BGRA → RGBA byte swizzle (or vice versa).
42    SwizzleBgraRgba,
43    /// Fused RGB → BGRA: byte swap + add opaque alpha in a single SIMD pass.
44    /// Equivalent to `[AddAlpha, SwizzleBgraRgba]` but writes the destination
45    /// once instead of twice.
46    RgbToBgra,
47    /// Add alpha channel (3ch → 4ch), filling with opaque.
48    AddAlpha,
49    /// Drop alpha channel (4ch → 3ch).
50    DropAlpha,
51    /// Composite onto solid matte color, then drop alpha (4ch → 3ch).
52    ///
53    /// Blends in linear light using the source descriptor's transfer
54    /// function: pixel RGB is EOTF'd per source TF, alpha-blended against
55    /// the pre-linearized matte, then OETF'd back to source TF. Alpha is
56    /// treated as linear regardless of color-channel TF. The matte
57    /// `(r, g, b)` is always interpreted as sRGB u8 (CSS-style background).
58    ///
59    /// Implemented uniformly across U8/U16/F32/F16 via per-TF
60    /// monomorphization; sRGB integer paths use LUT-based EOTF/OETF.
61    MatteComposite { r: u8, g: u8, b: u8 },
62    /// Gray → RGB (replicate gray to all 3 channels).
63    GrayToRgb,
64    /// Gray → RGBA (replicate + opaque alpha).
65    GrayToRgba,
66    /// RGB → Gray (Y' encoded luma — coefficients applied to encoded bytes).
67    ///
68    /// The semantic is BT.709/BT.601/etc. Y' (encoded luma), NOT linear-light
69    /// luminance L. This is fast, exactly round-trips for `R==G==B` inputs,
70    /// and matches what JPEG/video pipelines compute. Linear-light luminance
71    /// would require linearize → weight → encode and is not currently
72    /// surfaced; document any future linear-L pathway as a separate variant.
73    ///
74    /// Coefficients are resolved from `ConvertOptions::luma` at plan build
75    /// time (`new_explicit`). Default for plans built via `Self::new`
76    /// without options is `LumaCoefficients::Bt709`.
77    RgbToGray { coefficients: LumaCoefficients },
78    /// RGBA → Gray, drop alpha. See [`RgbToGray`](Self::RgbToGray) for
79    /// semantic and coefficient resolution.
80    RgbaToGray { coefficients: LumaCoefficients },
81    /// GrayAlpha → RGBA (replicate gray, keep alpha).
82    GrayAlphaToRgba,
83    /// GrayAlpha → RGB (replicate gray, drop alpha).
84    GrayAlphaToRgb,
85    /// Gray → GrayAlpha (add opaque alpha).
86    GrayToGrayAlpha,
87    /// GrayAlpha → Gray (drop alpha).
88    GrayAlphaToGray,
89    /// sRGB u8 → linear f32 (per channel, EOTF).
90    SrgbU8ToLinearF32,
91    /// Linear f32 → sRGB u8 (per channel, OETF).
92    LinearF32ToSrgbU8,
93    /// Naive u8 → f32 (v / 255.0, no gamma).
94    NaiveU8ToF32,
95    /// Naive f32 → u8 (clamp * 255 + 0.5, no gamma).
96    NaiveF32ToU8,
97    /// u16 → u8 ((v * 255 + 32768) >> 16).
98    U16ToU8,
99    /// u8 → u16 (v * 257).
100    U8ToU16,
101    /// u16 → f32 (v / 65535.0).
102    U16ToF32,
103    /// f32 → u16 (clamp * 65535 + 0.5).
104    F32ToU16,
105    /// f16 → f32 (IEEE 754 half-precision unpack, no TF).
106    F16ToF32,
107    /// f32 → f16 (round-to-nearest-even, no TF).
108    F32ToF16,
109    /// PQ (SMPTE ST 2084) u16 → linear f32 (EOTF).
110    PqU16ToLinearF32,
111    /// Linear f32 → PQ u16 (inverse EOTF / OETF).
112    LinearF32ToPqU16,
113    /// PQ f32 [0,1] → linear f32 (EOTF, no depth change).
114    PqF32ToLinearF32,
115    /// Linear f32 → PQ f32 [0,1] (OETF, no depth change).
116    LinearF32ToPqF32,
117    /// HLG (ARIB STD-B67) u16 → linear f32 (EOTF).
118    HlgU16ToLinearF32,
119    /// Linear f32 → HLG u16 (OETF).
120    LinearF32ToHlgU16,
121    /// HLG f32 [0,1] → linear f32 (EOTF, no depth change).
122    HlgF32ToLinearF32,
123    /// Linear f32 → HLG f32 [0,1] (OETF, no depth change).
124    LinearF32ToHlgF32,
125    /// sRGB f32 [0,1] → linear f32 (EOTF, no depth change). Clamps input.
126    SrgbF32ToLinearF32,
127    /// Linear f32 → sRGB f32 [0,1] (OETF, no depth change). Clamps output.
128    LinearF32ToSrgbF32,
129    /// sRGB f32 → linear f32 (EOTF, sign-preserving extended range).
130    /// Emitted when `ConvertOptions::clip_out_of_gamut == false`.
131    SrgbF32ToLinearF32Extended,
132    /// Linear f32 → sRGB f32 (OETF, sign-preserving extended range).
133    LinearF32ToSrgbF32Extended,
134    /// BT.709 f32 [0,1] → linear f32 (EOTF, no depth change).
135    Bt709F32ToLinearF32,
136    /// Linear f32 → BT.709 f32 [0,1] (OETF, no depth change).
137    LinearF32ToBt709F32,
138    /// Gamma 2.2 (Adobe RGB 1998) f32 [0,1] → linear f32 (EOTF, no depth change).
139    /// Uses the Adobe RGB 1998 canonical exponent 563/256 ≈ 2.19921875.
140    Gamma22F32ToLinearF32,
141    /// Linear f32 → Gamma 2.2 (Adobe RGB 1998) f32 [0,1] (OETF, no depth change).
142    LinearF32ToGamma22F32,
143    /// Straight → Premultiplied alpha.
144    StraightToPremul,
145    /// Premultiplied → Straight alpha.
146    PremulToStraight,
147    /// Linear RGB f32 → Oklab f32 (3-channel color model change).
148    LinearRgbToOklab,
149    /// Oklab f32 → Linear RGB f32 (3-channel color model change).
150    OklabToLinearRgb,
151    /// Linear RGBA f32 → Oklaba f32 (4-channel, alpha preserved).
152    LinearRgbaToOklaba,
153    /// Oklaba f32 → Linear RGBA f32 (4-channel, alpha preserved).
154    OklabaToLinearRgba,
155    /// Apply a 3×3 gamut matrix to linear RGB f32 (3 channels per pixel).
156    ///
157    /// Used for color primaries conversion (e.g., BT.709 ↔ Display P3 ↔ BT.2020).
158    /// Data must be in linear light. The matrix is row-major `[[f32; 3]; 3]`
159    /// flattened to `[f32; 9]`.
160    GamutMatrixRgbF32([f32; 9]),
161    /// Apply a 3×3 gamut matrix to linear RGBA f32 (4 channels, alpha passthrough).
162    GamutMatrixRgbaF32([f32; 9]),
163    /// Fused u8-sRGB RGB primaries conversion: LUT linearize → SIMD matrix →
164    /// SIMD f32→i32 → LUT encode, in one pass. Replaces the 3-step sequence
165    /// `[SrgbU8ToLinearF32, GamutMatrixRgbF32(m), LinearF32ToSrgbU8]`.
166    FusedSrgbU8GamutRgb([f32; 9]),
167    /// Fused u8-sRGB RGBA primaries conversion (alpha passthrough).
168    FusedSrgbU8GamutRgba([f32; 9]),
169    /// Fused u16-sRGB RGB primaries conversion via 65K-entry LUTs.
170    FusedSrgbU16GamutRgb([f32; 9]),
171    /// Fused u8-sRGB → linear-f32 RGB primaries conversion (cross-depth).
172    /// Output preserves extended range (no clamp).
173    FusedSrgbU8ToLinearF32Rgb([f32; 9]),
174    /// Fused linear-f32 → u8-sRGB RGB primaries conversion (cross-depth).
175    /// Always clamps since u8 can't represent out-of-gamut values.
176    FusedLinearF32ToSrgbU8Rgb([f32; 9]),
177}
178
179impl core::fmt::Debug for ConvertStep {
180    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
181        match self {
182            Self::Identity => f.write_str("Identity"),
183            Self::SwizzleBgraRgba => f.write_str("SwizzleBgraRgba"),
184            Self::RgbToBgra => f.write_str("RgbToBgra"),
185            Self::AddAlpha => f.write_str("AddAlpha"),
186            Self::DropAlpha => f.write_str("DropAlpha"),
187            Self::MatteComposite { r, g, b } => f
188                .debug_struct("MatteComposite")
189                .field("r", r)
190                .field("g", g)
191                .field("b", b)
192                .finish(),
193            Self::GrayToRgb => f.write_str("GrayToRgb"),
194            Self::GrayToRgba => f.write_str("GrayToRgba"),
195            Self::RgbToGray { coefficients } => f
196                .debug_struct("RgbToGray")
197                .field("coefficients", coefficients)
198                .finish(),
199            Self::RgbaToGray { coefficients } => f
200                .debug_struct("RgbaToGray")
201                .field("coefficients", coefficients)
202                .finish(),
203            Self::GrayAlphaToRgba => f.write_str("GrayAlphaToRgba"),
204            Self::GrayAlphaToRgb => f.write_str("GrayAlphaToRgb"),
205            Self::GrayToGrayAlpha => f.write_str("GrayToGrayAlpha"),
206            Self::GrayAlphaToGray => f.write_str("GrayAlphaToGray"),
207            Self::SrgbU8ToLinearF32 => f.write_str("SrgbU8ToLinearF32"),
208            Self::LinearF32ToSrgbU8 => f.write_str("LinearF32ToSrgbU8"),
209            Self::NaiveU8ToF32 => f.write_str("NaiveU8ToF32"),
210            Self::NaiveF32ToU8 => f.write_str("NaiveF32ToU8"),
211            Self::U16ToU8 => f.write_str("U16ToU8"),
212            Self::U8ToU16 => f.write_str("U8ToU16"),
213            Self::U16ToF32 => f.write_str("U16ToF32"),
214            Self::F32ToU16 => f.write_str("F32ToU16"),
215            Self::F16ToF32 => f.write_str("F16ToF32"),
216            Self::F32ToF16 => f.write_str("F32ToF16"),
217            Self::PqU16ToLinearF32 => f.write_str("PqU16ToLinearF32"),
218            Self::LinearF32ToPqU16 => f.write_str("LinearF32ToPqU16"),
219            Self::PqF32ToLinearF32 => f.write_str("PqF32ToLinearF32"),
220            Self::LinearF32ToPqF32 => f.write_str("LinearF32ToPqF32"),
221            Self::HlgU16ToLinearF32 => f.write_str("HlgU16ToLinearF32"),
222            Self::LinearF32ToHlgU16 => f.write_str("LinearF32ToHlgU16"),
223            Self::HlgF32ToLinearF32 => f.write_str("HlgF32ToLinearF32"),
224            Self::LinearF32ToHlgF32 => f.write_str("LinearF32ToHlgF32"),
225            Self::SrgbF32ToLinearF32 => f.write_str("SrgbF32ToLinearF32"),
226            Self::LinearF32ToSrgbF32 => f.write_str("LinearF32ToSrgbF32"),
227            Self::SrgbF32ToLinearF32Extended => f.write_str("SrgbF32ToLinearF32Extended"),
228            Self::LinearF32ToSrgbF32Extended => f.write_str("LinearF32ToSrgbF32Extended"),
229            Self::Bt709F32ToLinearF32 => f.write_str("Bt709F32ToLinearF32"),
230            Self::LinearF32ToBt709F32 => f.write_str("LinearF32ToBt709F32"),
231            Self::Gamma22F32ToLinearF32 => f.write_str("Gamma22F32ToLinearF32"),
232            Self::LinearF32ToGamma22F32 => f.write_str("LinearF32ToGamma22F32"),
233            Self::StraightToPremul => f.write_str("StraightToPremul"),
234            Self::PremulToStraight => f.write_str("PremulToStraight"),
235            Self::LinearRgbToOklab => f.write_str("LinearRgbToOklab"),
236            Self::OklabToLinearRgb => f.write_str("OklabToLinearRgb"),
237            Self::LinearRgbaToOklaba => f.write_str("LinearRgbaToOklaba"),
238            Self::OklabaToLinearRgba => f.write_str("OklabaToLinearRgba"),
239            Self::GamutMatrixRgbF32(m) => f.debug_tuple("GamutMatrixRgbF32").field(m).finish(),
240            Self::GamutMatrixRgbaF32(m) => f.debug_tuple("GamutMatrixRgbaF32").field(m).finish(),
241            Self::FusedSrgbU8GamutRgb(m) => f.debug_tuple("FusedSrgbU8GamutRgb").field(m).finish(),
242            Self::FusedSrgbU8GamutRgba(m) => {
243                f.debug_tuple("FusedSrgbU8GamutRgba").field(m).finish()
244            }
245            Self::FusedSrgbU16GamutRgb(m) => {
246                f.debug_tuple("FusedSrgbU16GamutRgb").field(m).finish()
247            }
248            Self::FusedSrgbU8ToLinearF32Rgb(m) => {
249                f.debug_tuple("FusedSrgbU8ToLinearF32Rgb").field(m).finish()
250            }
251            Self::FusedLinearF32ToSrgbU8Rgb(m) => {
252                f.debug_tuple("FusedLinearF32ToSrgbU8Rgb").field(m).finish()
253            }
254        }
255    }
256}
257
258/// Assert that a descriptor is not CMYK.
259///
260/// CMYK is device-dependent and cannot be converted by zenpixels-convert.
261/// Use a CMS (e.g., moxcms) with an ICC profile for CMYK↔RGB conversion.
262fn assert_not_cmyk(desc: &PixelDescriptor) {
263    assert!(
264        desc.color_model() != crate::ColorModel::Cmyk,
265        "CMYK pixel data cannot be processed by zenpixels-convert. \
266         Use a CMS (e.g., moxcms) with an ICC profile for CMYK↔RGB conversion."
267    );
268}
269
270impl ConvertPlan {
271    /// Create a conversion plan from `from` to `to`.
272    ///
273    /// Returns `Err` if no conversion path exists.
274    ///
275    /// # Panics
276    ///
277    /// Panics if either `from` or `to` uses [`ColorModel::Cmyk`](zenpixels::ColorModel::Cmyk).
278    /// CMYK requires a CMS with an ICC profile for conversion.
279    #[track_caller]
280    pub fn new(from: PixelDescriptor, to: PixelDescriptor) -> Result<Self, At<ConvertError>> {
281        assert_not_cmyk(&from);
282        assert_not_cmyk(&to);
283        if from == to {
284            return Ok(Self {
285                from,
286                to,
287                steps: vec![ConvertStep::Identity],
288            });
289        }
290
291        let mut steps = Vec::with_capacity(3);
292
293        // Step 1: Layout conversion (within same depth class).
294        // Step 2: Depth conversion.
295        // Step 3: Alpha mode conversion.
296        //
297        // For cross-depth conversions, we convert layout at the source depth
298        // first, then change depth. This minimizes the number of channels
299        // we need to depth-convert.
300
301        let need_depth_change = from.channel_type() != to.channel_type();
302        let need_layout_change = from.layout() != to.layout();
303        let need_alpha_change =
304            from.alpha() != to.alpha() && from.alpha().is_some() && to.alpha().is_some();
305
306        // Depth/TF steps are needed when depth changes, or when transfer
307        // functions differ (at any depth — integer TF changes route through
308        // an F32 linear intermediate, handled in `depth_steps`).
309        let need_depth_or_tf = need_depth_change || from.transfer() != to.transfer();
310
311        // If we need to change depth AND layout, plan the optimal order.
312        if need_layout_change {
313            // When going to fewer channels, convert layout first (less depth work).
314            // When going to more channels, convert depth first (less layout work).
315            //
316            // Exception: Oklab layout steps require f32 data. When the source
317            // is integer (U8/U16) and the layout change involves Oklab, we must
318            // convert depth first regardless of channel count.
319            let src_ch = from.layout().channels();
320            let dst_ch = to.layout().channels();
321            let involves_oklab =
322                matches!(from.layout(), ChannelLayout::Oklab | ChannelLayout::OklabA)
323                    || matches!(to.layout(), ChannelLayout::Oklab | ChannelLayout::OklabA);
324
325            // Oklab conversion requires known primaries for the RGB→LMS matrix.
326            if involves_oklab && from.primaries == ColorPrimaries::Unknown {
327                return Err(whereat::at!(ConvertError::NoPath { from, to }));
328            }
329
330            let depth_first = need_depth_or_tf
331                && (dst_ch > src_ch || (involves_oklab && from.channel_type() != ChannelType::F32));
332
333            if depth_first {
334                // Depth first, then layout.
335                steps.extend(
336                    depth_steps(
337                        from.channel_type(),
338                        to.channel_type(),
339                        from.transfer(),
340                        to.transfer(),
341                    )
342                    .map_err(|e| whereat::at!(e))?,
343                );
344                steps.extend(layout_steps(from.layout(), to.layout()));
345            } else {
346                // Layout first, then depth.
347                steps.extend(layout_steps(from.layout(), to.layout()));
348                if need_depth_or_tf {
349                    steps.extend(
350                        depth_steps(
351                            from.channel_type(),
352                            to.channel_type(),
353                            from.transfer(),
354                            to.transfer(),
355                        )
356                        .map_err(|e| whereat::at!(e))?,
357                    );
358                }
359            }
360        } else if need_depth_or_tf {
361            steps.extend(
362                depth_steps(
363                    from.channel_type(),
364                    to.channel_type(),
365                    from.transfer(),
366                    to.transfer(),
367                )
368                .map_err(|e| whereat::at!(e))?,
369            );
370        }
371
372        // Alpha mode conversion (if both have alpha and modes differ).
373        if need_alpha_change {
374            match (from.alpha(), to.alpha()) {
375                (Some(AlphaMode::Straight), Some(AlphaMode::Premultiplied)) => {
376                    steps.push(ConvertStep::StraightToPremul);
377                }
378                (Some(AlphaMode::Premultiplied), Some(AlphaMode::Straight)) => {
379                    steps.push(ConvertStep::PremulToStraight);
380                }
381                _ => {}
382            }
383        }
384
385        // Primaries conversion: if source and destination have different known
386        // primaries, inject a gamut matrix in linear f32 space.
387        let need_primaries = from.primaries != to.primaries
388            && from.primaries != ColorPrimaries::Unknown
389            && to.primaries != ColorPrimaries::Unknown;
390
391        if need_primaries
392            && let Some(matrix) = crate::gamut::conversion_matrix(from.primaries, to.primaries)
393        {
394            // Flatten the 3×3 matrix for storage in the step enum.
395            let flat = [
396                matrix[0][0],
397                matrix[0][1],
398                matrix[0][2],
399                matrix[1][0],
400                matrix[1][1],
401                matrix[1][2],
402                matrix[2][0],
403                matrix[2][1],
404                matrix[2][2],
405            ];
406
407            // The gamut matrix must be applied in linear f32 space.
408            // Check if the existing steps already go through linear f32.
409            let mut goes_through_linear = false;
410            {
411                let mut desc = from;
412                for step in &steps {
413                    desc = intermediate_desc(desc, step);
414                    if desc.channel_type() == ChannelType::F32
415                        && desc.transfer() == TransferFunction::Linear
416                    {
417                        goes_through_linear = true;
418                    }
419                }
420            }
421
422            if goes_through_linear {
423                // Insert the gamut matrix right after the first step that
424                // produces linear f32. All subsequent steps encode to the
425                // target format.
426                let mut insert_pos = 0;
427                let mut desc = from;
428                for (i, step) in steps.iter().enumerate() {
429                    desc = intermediate_desc(desc, step);
430                    if desc.channel_type() == ChannelType::F32
431                        && desc.transfer() == TransferFunction::Linear
432                    {
433                        insert_pos = i + 1;
434                        break;
435                    }
436                }
437                let gamut_step = if desc.layout().has_alpha() {
438                    ConvertStep::GamutMatrixRgbaF32(flat)
439                } else {
440                    ConvertStep::GamutMatrixRgbF32(flat)
441                };
442                steps.insert(insert_pos, gamut_step);
443            } else {
444                // No existing linear f32 step — we must add linearize → gamut → delinearize.
445                // Determine layout for the gamut step.
446                let has_alpha = from.layout().has_alpha() || to.layout().has_alpha();
447                // Use the layout at the current point in the plan.
448                let mut desc = from;
449                for step in &steps {
450                    desc = intermediate_desc(desc, step);
451                }
452                let gamut_step = if desc.layout().has_alpha() || has_alpha {
453                    ConvertStep::GamutMatrixRgbaF32(flat)
454                } else {
455                    ConvertStep::GamutMatrixRgbF32(flat)
456                };
457
458                // Insert linearize → gamut → encode-to-target-tf at the end,
459                // before any alpha mode steps.
460                let linearize = match desc.transfer() {
461                    TransferFunction::Srgb => ConvertStep::SrgbF32ToLinearF32,
462                    TransferFunction::Bt709 => ConvertStep::Bt709F32ToLinearF32,
463                    TransferFunction::Pq => ConvertStep::PqF32ToLinearF32,
464                    TransferFunction::Hlg => ConvertStep::HlgF32ToLinearF32,
465                    TransferFunction::Gamma22 => ConvertStep::Gamma22F32ToLinearF32,
466                    TransferFunction::Linear => ConvertStep::Identity,
467                    _ => ConvertStep::SrgbF32ToLinearF32, // assume sRGB for Unknown
468                };
469                let to_target_tf = match to.transfer() {
470                    TransferFunction::Srgb => ConvertStep::LinearF32ToSrgbF32,
471                    TransferFunction::Bt709 => ConvertStep::LinearF32ToBt709F32,
472                    TransferFunction::Pq => ConvertStep::LinearF32ToPqF32,
473                    TransferFunction::Hlg => ConvertStep::LinearF32ToHlgF32,
474                    TransferFunction::Gamma22 => ConvertStep::LinearF32ToGamma22F32,
475                    TransferFunction::Linear => ConvertStep::Identity,
476                    _ => ConvertStep::LinearF32ToSrgbF32, // assume sRGB for Unknown
477                };
478
479                // Need to be in f32 first. If current is integer, add naive conversion.
480                let mut gamut_steps = Vec::new();
481                // Direct fused-step emissions for common cases.
482                if desc.channel_type() == ChannelType::U16
483                    && desc.transfer() == TransferFunction::Srgb
484                    && to.channel_type() == ChannelType::U16
485                    && to.transfer() == TransferFunction::Srgb
486                    && !desc.layout().has_alpha()
487                    && !to.layout().has_alpha()
488                {
489                    // u16 sRGB → u16 sRGB RGB: single-step matlut.
490                    gamut_steps.push(ConvertStep::FusedSrgbU16GamutRgb(flat));
491                    steps.extend(gamut_steps);
492                    if steps.is_empty() {
493                        steps.push(ConvertStep::Identity);
494                    }
495                    fuse_matlut_patterns(&mut steps);
496                    return Ok(Self { from, to, steps });
497                }
498                if desc.channel_type() == ChannelType::U8
499                    && matches!(desc.transfer(), TransferFunction::Srgb)
500                    && to.channel_type() == ChannelType::F32
501                    && to.transfer() == TransferFunction::Linear
502                    && !desc.layout().has_alpha()
503                    && !to.layout().has_alpha()
504                {
505                    // u8 sRGB → linear f32 RGB: cross-depth matlut.
506                    gamut_steps.push(ConvertStep::FusedSrgbU8ToLinearF32Rgb(flat));
507                    steps.extend(gamut_steps);
508                    if steps.is_empty() {
509                        steps.push(ConvertStep::Identity);
510                    }
511                    fuse_matlut_patterns(&mut steps);
512                    return Ok(Self { from, to, steps });
513                }
514                if desc.channel_type() == ChannelType::F32
515                    && desc.transfer() == TransferFunction::Linear
516                    && to.channel_type() == ChannelType::U8
517                    && to.transfer() == TransferFunction::Srgb
518                    && !desc.layout().has_alpha()
519                    && !to.layout().has_alpha()
520                {
521                    // linear f32 → u8 sRGB RGB: cross-depth matlut.
522                    gamut_steps.push(ConvertStep::FusedLinearF32ToSrgbU8Rgb(flat));
523                    steps.extend(gamut_steps);
524                    if steps.is_empty() {
525                        steps.push(ConvertStep::Identity);
526                    }
527                    fuse_matlut_patterns(&mut steps);
528                    return Ok(Self { from, to, steps });
529                }
530                if desc.channel_type() != ChannelType::F32 {
531                    // Use the fused sRGB u8→linear f32 if applicable.
532                    if desc.channel_type() == ChannelType::U8
533                        && matches!(
534                            desc.transfer(),
535                            TransferFunction::Srgb
536                                | TransferFunction::Bt709
537                                | TransferFunction::Unknown
538                        )
539                    {
540                        gamut_steps.push(ConvertStep::SrgbU8ToLinearF32);
541                        // Already linear, skip separate linearize.
542                        gamut_steps.push(gamut_step);
543                        gamut_steps.push(ConvertStep::LinearF32ToSrgbU8);
544                    } else if desc.channel_type() == ChannelType::U16
545                        && desc.transfer() == TransferFunction::Pq
546                    {
547                        gamut_steps.push(ConvertStep::PqU16ToLinearF32);
548                        gamut_steps.push(gamut_step);
549                        gamut_steps.push(ConvertStep::LinearF32ToPqU16);
550                    } else if desc.channel_type() == ChannelType::U16
551                        && desc.transfer() == TransferFunction::Hlg
552                    {
553                        gamut_steps.push(ConvertStep::HlgU16ToLinearF32);
554                        gamut_steps.push(gamut_step);
555                        gamut_steps.push(ConvertStep::LinearF32ToHlgU16);
556                    } else {
557                        // Generic: naive to f32, linearize, gamut, delinearize, naive back
558                        gamut_steps.push(ConvertStep::NaiveU8ToF32);
559                        if !matches!(linearize, ConvertStep::Identity) {
560                            gamut_steps.push(linearize);
561                        }
562                        gamut_steps.push(gamut_step);
563                        if !matches!(to_target_tf, ConvertStep::Identity) {
564                            gamut_steps.push(to_target_tf);
565                        }
566                        gamut_steps.push(ConvertStep::NaiveF32ToU8);
567                    }
568                } else {
569                    // Already f32, just linearize → gamut → encode
570                    if !matches!(linearize, ConvertStep::Identity) {
571                        gamut_steps.push(linearize);
572                    }
573                    gamut_steps.push(gamut_step);
574                    if !matches!(to_target_tf, ConvertStep::Identity) {
575                        gamut_steps.push(to_target_tf);
576                    }
577                }
578
579                steps.extend(gamut_steps);
580            }
581        }
582
583        if steps.is_empty() {
584            // Transfer-only difference or alpha-mode-only: identity path.
585            steps.push(ConvertStep::Identity);
586        }
587
588        // Peephole fusion: collapse common 3-step patterns into single fused
589        // kernels that avoid scratch-buffer round-trips.
590        fuse_matlut_patterns(&mut steps);
591
592        Ok(Self { from, to, steps })
593    }
594
595    /// Create a conversion plan with explicit policy enforcement.
596    ///
597    /// Validates that the planned conversion steps are allowed by the given
598    /// policies before creating the plan. Returns an error if a forbidden
599    /// operation would be required.
600    ///
601    /// # Panics
602    ///
603    /// Panics if either `from` or `to` uses [`ColorModel::Cmyk`](zenpixels::ColorModel::Cmyk).
604    /// CMYK requires a CMS with an ICC profile for conversion.
605    #[track_caller]
606    pub fn new_explicit(
607        from: PixelDescriptor,
608        to: PixelDescriptor,
609        options: &ConvertOptions,
610    ) -> Result<Self, At<ConvertError>> {
611        assert_not_cmyk(&from);
612        assert_not_cmyk(&to);
613        // Check alpha removal policy.
614        let drops_alpha = from.alpha().is_some() && to.alpha().is_none();
615        if drops_alpha && options.alpha_policy == AlphaPolicy::Forbid {
616            return Err(whereat::at!(ConvertError::AlphaRemovalForbidden));
617        }
618
619        // Check depth reduction policy. Compare by precision bits, not byte
620        // size — F16 and U16 are both 2 bytes but F16 carries only ~11 bits of
621        // precision vs U16's 16, so a U16→F16 hop IS a precision reduction and
622        // must be policy-gated.
623        let reduces_depth = crate::negotiate::channel_bits(from.channel_type())
624            > crate::negotiate::channel_bits(to.channel_type());
625        if reduces_depth && options.depth_policy == DepthPolicy::Forbid {
626            return Err(whereat::at!(ConvertError::DepthReductionForbidden));
627        }
628
629        // Check RGB→Gray requires luma coefficients.
630        let src_is_rgb = matches!(
631            from.layout(),
632            ChannelLayout::Rgb | ChannelLayout::Rgba | ChannelLayout::Bgra
633        );
634        let dst_is_gray = matches!(to.layout(), ChannelLayout::Gray | ChannelLayout::GrayAlpha);
635        if src_is_rgb && dst_is_gray && options.luma.is_none() {
636            return Err(whereat::at!(ConvertError::RgbToGray));
637        }
638
639        let mut plan = Self::new(from, to).at()?;
640
641        // Replace DropAlpha with MatteComposite when policy is CompositeOnto.
642        //
643        // The `matte_composite` kernel uses the straight-alpha over operator
644        // `fg*a + bg*(1-a)`, linearizing the sRGB matte and pixel RGB
645        // per-pixel using the source TF (kernel-side TF dispatch via the
646        // `MatteTf` trait). Alpha stays as-is (alpha is always linear,
647        // regardless of color-channel TF).
648        //
649        // One planner-side caveat handled here:
650        //
651        // **Premultiplied source.** If the source is premultiplied (our
652        // library's convention is encoded-space premul, per Canvas 2D),
653        // the straight kernel would multiply by `a` twice:
654        // `straight*a² + bg*(1-a)`. Fix: insert `PremulToStraight` before
655        // `MatteComposite`.
656        //
657        // We deliberately do NOT wrap with `SrgbF32ToLinearF32` /
658        // `LinearF32ToSrgbF32` to handle non-linear data: those steps
659        // linearize alpha too, which breaks the blend math.
660        if drops_alpha && let AlphaPolicy::CompositeOnto { r, g, b } = options.alpha_policy {
661            let src_is_premul = from.alpha() == Some(AlphaMode::Premultiplied);
662            let mut idx = 0;
663            while idx < plan.steps.len() {
664                if matches!(plan.steps[idx], ConvertStep::DropAlpha) {
665                    plan.steps[idx] = ConvertStep::MatteComposite { r, g, b };
666                    if src_is_premul {
667                        plan.steps.insert(idx, ConvertStep::PremulToStraight);
668                        idx += 1;
669                    }
670                }
671                idx += 1;
672            }
673        }
674
675        // When the caller opts out of clipping, swap pure-f32 sRGB transfer
676        // steps for their sign-preserving extended-range counterparts.
677        // Fused u8/u16 matlut steps are unaffected (integer I/O can't
678        // represent extended range anyway).
679        if !options.clip_out_of_gamut {
680            for step in &mut plan.steps {
681                match step {
682                    ConvertStep::SrgbF32ToLinearF32 => {
683                        *step = ConvertStep::SrgbF32ToLinearF32Extended;
684                    }
685                    ConvertStep::LinearF32ToSrgbF32 => {
686                        *step = ConvertStep::LinearF32ToSrgbF32Extended;
687                    }
688                    _ => {}
689                }
690            }
691        }
692
693        // Resolve luma coefficients on RgbToGray / RgbaToGray steps. The
694        // None case was rejected above (line 636), so unwrap is safe here.
695        // `layout_steps` constructs these variants with a Bt709 placeholder
696        // because it has no access to options; we replace with the user's
697        // explicit choice (or the permissive default of Bt709) here.
698        let user_luma = options.luma.unwrap_or(LumaCoefficients::Bt709);
699        for step in &mut plan.steps {
700            match step {
701                ConvertStep::RgbToGray { coefficients }
702                | ConvertStep::RgbaToGray { coefficients } => {
703                    *coefficients = user_luma;
704                }
705                _ => {}
706            }
707        }
708
709        Ok(plan)
710    }
711
712    /// Create a shell plan that records from/to but has no conversion steps.
713    ///
714    /// Used when an external CMS transform handles the conversion — the
715    /// plan exists only for `from()`/`to()` metadata; the actual row
716    /// work is driven by the external transform stored on `RowConverter`.
717    pub(crate) fn identity(from: PixelDescriptor, to: PixelDescriptor) -> Self {
718        Self {
719            from,
720            to,
721            steps: vec![ConvertStep::Identity],
722        }
723    }
724
725    /// Compose two plans into one: apply `self` then `other`.
726    ///
727    /// The composed plan executes both conversions in a single `convert_row`
728    /// call, using one intermediate buffer instead of two. Adjacent inverse
729    /// steps are cancelled (e.g., `SrgbU8ToLinearF32` + `LinearF32ToSrgbU8`
730    /// → identity).
731    ///
732    /// Returns `None` if `self.to` != `other.from` (incompatible plans).
733    pub fn compose(&self, other: &Self) -> Option<Self> {
734        if self.to != other.from {
735            return None;
736        }
737
738        let mut steps = self.steps.clone();
739
740        // Append other's steps, skipping its Identity if present.
741        for step in &other.steps {
742            if matches!(step, ConvertStep::Identity) {
743                continue;
744            }
745            steps.push(step.clone());
746        }
747
748        // Peephole: cancel adjacent inverse pairs.
749        let mut changed = true;
750        while changed {
751            changed = false;
752            let mut i = 0;
753            while i + 1 < steps.len() {
754                if are_inverse(&steps[i], &steps[i + 1]) {
755                    steps.remove(i + 1);
756                    steps.remove(i);
757                    changed = true;
758                    // Don't advance — check the new adjacent pair.
759                } else {
760                    i += 1;
761                }
762            }
763        }
764
765        // If everything cancelled, produce identity.
766        if steps.is_empty() {
767            steps.push(ConvertStep::Identity);
768        }
769
770        // Remove leading/trailing Identity if there are real steps.
771        if steps.len() > 1 {
772            steps.retain(|s| !matches!(s, ConvertStep::Identity));
773            if steps.is_empty() {
774                steps.push(ConvertStep::Identity);
775            }
776        }
777
778        Some(Self {
779            from: self.from,
780            to: other.to,
781            steps,
782        })
783    }
784
785    /// True if conversion is a no-op.
786    #[must_use]
787    pub fn is_identity(&self) -> bool {
788        self.steps.len() == 1 && matches!(self.steps[0], ConvertStep::Identity)
789    }
790
791    /// Maximum bytes-per-pixel across all intermediate formats in the plan.
792    ///
793    /// Used to pre-allocate scratch buffers for streaming conversion.
794    pub(crate) fn max_intermediate_bpp(&self) -> usize {
795        let mut desc = self.from;
796        let mut max_bpp = desc.bytes_per_pixel();
797        for step in &self.steps {
798            desc = intermediate_desc(desc, step);
799            max_bpp = max_bpp.max(desc.bytes_per_pixel());
800        }
801        max_bpp
802    }
803
804    /// Source descriptor.
805    pub fn from(&self) -> PixelDescriptor {
806        self.from
807    }
808
809    /// Target descriptor.
810    pub fn to(&self) -> PixelDescriptor {
811        self.to
812    }
813}
814
815/// Determine the layout conversion step(s).
816///
817/// Some layout conversions require two steps (e.g., BGRA -> RGB needs
818/// swizzle + drop alpha). Returns up to 2 steps.
819fn layout_steps(from: ChannelLayout, to: ChannelLayout) -> Vec<ConvertStep> {
820    if from == to {
821        return Vec::new();
822    }
823    match (from, to) {
824        (ChannelLayout::Bgra, ChannelLayout::Rgba) | (ChannelLayout::Rgba, ChannelLayout::Bgra) => {
825            vec![ConvertStep::SwizzleBgraRgba]
826        }
827        (ChannelLayout::Rgb, ChannelLayout::Rgba) => vec![ConvertStep::AddAlpha],
828        (ChannelLayout::Rgb, ChannelLayout::Bgra) => {
829            // Single fused SIMD pass (garb::bytes::rgb_to_bgra). For non-u8
830            // channel types `apply_step_u8` falls back to AddAlpha+Swizzle.
831            vec![ConvertStep::RgbToBgra]
832        }
833        (ChannelLayout::Rgba, ChannelLayout::Rgb) => vec![ConvertStep::DropAlpha],
834        (ChannelLayout::Bgra, ChannelLayout::Rgb) => {
835            // BGRA -> RGBA -> RGB: swizzle then drop alpha.
836            vec![ConvertStep::SwizzleBgraRgba, ConvertStep::DropAlpha]
837        }
838        (ChannelLayout::Gray, ChannelLayout::Rgb) => vec![ConvertStep::GrayToRgb],
839        (ChannelLayout::Gray, ChannelLayout::Rgba) => vec![ConvertStep::GrayToRgba],
840        (ChannelLayout::Gray, ChannelLayout::Bgra) => {
841            // Gray -> RGBA -> BGRA: expand then swizzle.
842            vec![ConvertStep::GrayToRgba, ConvertStep::SwizzleBgraRgba]
843        }
844        (ChannelLayout::Rgb, ChannelLayout::Gray) => vec![ConvertStep::RgbToGray {
845            coefficients: LumaCoefficients::Bt709,
846        }],
847        (ChannelLayout::Rgba, ChannelLayout::Gray) => vec![ConvertStep::RgbaToGray {
848            coefficients: LumaCoefficients::Bt709,
849        }],
850        (ChannelLayout::Bgra, ChannelLayout::Gray) => {
851            // BGRA -> RGBA -> Gray: swizzle then to gray.
852            vec![
853                ConvertStep::SwizzleBgraRgba,
854                ConvertStep::RgbaToGray {
855                    coefficients: LumaCoefficients::Bt709,
856                },
857            ]
858        }
859        (ChannelLayout::GrayAlpha, ChannelLayout::Rgba) => vec![ConvertStep::GrayAlphaToRgba],
860        (ChannelLayout::GrayAlpha, ChannelLayout::Bgra) => {
861            // GrayAlpha -> RGBA -> BGRA: expand then swizzle.
862            vec![ConvertStep::GrayAlphaToRgba, ConvertStep::SwizzleBgraRgba]
863        }
864        (ChannelLayout::GrayAlpha, ChannelLayout::Rgb) => vec![ConvertStep::GrayAlphaToRgb],
865        (ChannelLayout::Gray, ChannelLayout::GrayAlpha) => vec![ConvertStep::GrayToGrayAlpha],
866        (ChannelLayout::GrayAlpha, ChannelLayout::Gray) => vec![ConvertStep::GrayAlphaToGray],
867
868        // Oklab ↔ RGB conversions (via linear RGB).
869        (ChannelLayout::Rgb, ChannelLayout::Oklab) => vec![ConvertStep::LinearRgbToOklab],
870        (ChannelLayout::Oklab, ChannelLayout::Rgb) => vec![ConvertStep::OklabToLinearRgb],
871        (ChannelLayout::Rgba, ChannelLayout::OklabA) => vec![ConvertStep::LinearRgbaToOklaba],
872        (ChannelLayout::OklabA, ChannelLayout::Rgba) => vec![ConvertStep::OklabaToLinearRgba],
873
874        // Oklab ↔ RGB with alpha add/drop.
875        (ChannelLayout::Rgb, ChannelLayout::OklabA) => {
876            vec![ConvertStep::AddAlpha, ConvertStep::LinearRgbaToOklaba]
877        }
878        (ChannelLayout::OklabA, ChannelLayout::Rgb) => {
879            vec![ConvertStep::OklabaToLinearRgba, ConvertStep::DropAlpha]
880        }
881        (ChannelLayout::Oklab, ChannelLayout::Rgba) => {
882            vec![ConvertStep::OklabToLinearRgb, ConvertStep::AddAlpha]
883        }
884        (ChannelLayout::Rgba, ChannelLayout::Oklab) => {
885            vec![ConvertStep::DropAlpha, ConvertStep::LinearRgbToOklab]
886        }
887
888        // Oklab ↔ BGRA (swizzle to/from RGBA, then Oklab).
889        (ChannelLayout::Bgra, ChannelLayout::OklabA) => {
890            vec![
891                ConvertStep::SwizzleBgraRgba,
892                ConvertStep::LinearRgbaToOklaba,
893            ]
894        }
895        (ChannelLayout::OklabA, ChannelLayout::Bgra) => {
896            vec![
897                ConvertStep::OklabaToLinearRgba,
898                ConvertStep::SwizzleBgraRgba,
899            ]
900        }
901        (ChannelLayout::Bgra, ChannelLayout::Oklab) => {
902            vec![
903                ConvertStep::SwizzleBgraRgba,
904                ConvertStep::DropAlpha,
905                ConvertStep::LinearRgbToOklab,
906            ]
907        }
908        (ChannelLayout::Oklab, ChannelLayout::Bgra) => {
909            vec![
910                ConvertStep::OklabToLinearRgb,
911                ConvertStep::AddAlpha,
912                ConvertStep::SwizzleBgraRgba,
913            ]
914        }
915
916        // Gray ↔ Oklab (expand gray to RGB first).
917        (ChannelLayout::Gray, ChannelLayout::Oklab) => {
918            vec![ConvertStep::GrayToRgb, ConvertStep::LinearRgbToOklab]
919        }
920        (ChannelLayout::Oklab, ChannelLayout::Gray) => {
921            vec![
922                ConvertStep::OklabToLinearRgb,
923                ConvertStep::RgbToGray {
924                    coefficients: LumaCoefficients::Bt709,
925                },
926            ]
927        }
928        (ChannelLayout::Gray, ChannelLayout::OklabA) => {
929            vec![ConvertStep::GrayToRgba, ConvertStep::LinearRgbaToOklaba]
930        }
931        (ChannelLayout::OklabA, ChannelLayout::Gray) => {
932            vec![
933                ConvertStep::OklabaToLinearRgba,
934                ConvertStep::RgbaToGray {
935                    coefficients: LumaCoefficients::Bt709,
936                },
937            ]
938        }
939        (ChannelLayout::GrayAlpha, ChannelLayout::OklabA) => {
940            vec![
941                ConvertStep::GrayAlphaToRgba,
942                ConvertStep::LinearRgbaToOklaba,
943            ]
944        }
945        (ChannelLayout::OklabA, ChannelLayout::GrayAlpha) => {
946            // Drop alpha from OklabA→Oklab, convert to RGB, then to GrayAlpha.
947            // Alpha is lost; this is inherently lossy.
948            vec![
949                ConvertStep::OklabaToLinearRgba,
950                ConvertStep::RgbaToGray {
951                    coefficients: LumaCoefficients::Bt709,
952                },
953                ConvertStep::GrayToGrayAlpha,
954            ]
955        }
956        (ChannelLayout::GrayAlpha, ChannelLayout::Oklab) => {
957            vec![ConvertStep::GrayAlphaToRgb, ConvertStep::LinearRgbToOklab]
958        }
959        (ChannelLayout::Oklab, ChannelLayout::GrayAlpha) => {
960            vec![
961                ConvertStep::OklabToLinearRgb,
962                ConvertStep::RgbToGray {
963                    coefficients: LumaCoefficients::Bt709,
964                },
965                ConvertStep::GrayToGrayAlpha,
966            ]
967        }
968
969        // Oklab ↔ alpha variants.
970        (ChannelLayout::Oklab, ChannelLayout::OklabA) => vec![ConvertStep::AddAlpha],
971        (ChannelLayout::OklabA, ChannelLayout::Oklab) => vec![ConvertStep::DropAlpha],
972
973        _ => Vec::new(), // Unsupported layout conversion.
974    }
975}
976
977/// F32→F32 linearize step for a transfer function, or `None` if the TF is
978/// already linear (or Unknown — caller decides how to handle Unknown).
979fn f32_linearize_step(tf: TransferFunction) -> Option<ConvertStep> {
980    match tf {
981        TransferFunction::Linear => None,
982        TransferFunction::Srgb => Some(ConvertStep::SrgbF32ToLinearF32),
983        TransferFunction::Bt709 => Some(ConvertStep::Bt709F32ToLinearF32),
984        TransferFunction::Pq => Some(ConvertStep::PqF32ToLinearF32),
985        TransferFunction::Hlg => Some(ConvertStep::HlgF32ToLinearF32),
986        TransferFunction::Gamma22 => Some(ConvertStep::Gamma22F32ToLinearF32),
987        TransferFunction::Unknown => None,
988        _ => None,
989    }
990}
991
992/// F32→F32 OETF step for a transfer function, or `None` if the TF is linear
993/// (or Unknown).
994fn f32_encode_step(tf: TransferFunction) -> Option<ConvertStep> {
995    match tf {
996        TransferFunction::Linear => None,
997        TransferFunction::Srgb => Some(ConvertStep::LinearF32ToSrgbF32),
998        TransferFunction::Bt709 => Some(ConvertStep::LinearF32ToBt709F32),
999        TransferFunction::Pq => Some(ConvertStep::LinearF32ToPqF32),
1000        TransferFunction::Hlg => Some(ConvertStep::LinearF32ToHlgF32),
1001        TransferFunction::Gamma22 => Some(ConvertStep::LinearF32ToGamma22F32),
1002        TransferFunction::Unknown => None,
1003        _ => None,
1004    }
1005}
1006
1007/// F32→F32 TF-change steps: linearize (if not already linear) then encode
1008/// (if target is not linear).
1009///
1010/// Returns empty when `from == to`, or when either side is `Unknown` — when
1011/// one side's TF is unknown we can't mechanically compute a correct
1012/// conversion, so we preserve bytes as-is. Addressing the Unknown ambiguity
1013/// via explicit opt-in API is tracked as issue #19 [C]/[D] (deprecate-and-add).
1014fn f32_tf_pair_steps(from: TransferFunction, to: TransferFunction) -> Vec<ConvertStep> {
1015    if from == to || from == TransferFunction::Unknown || to == TransferFunction::Unknown {
1016        return Vec::new();
1017    }
1018    let mut steps = Vec::with_capacity(2);
1019    if let Some(s) = f32_linearize_step(from) {
1020        steps.push(s);
1021    }
1022    if let Some(s) = f32_encode_step(to) {
1023        steps.push(s);
1024    }
1025    steps
1026}
1027
1028/// Depth conversion step into F32 for any non-F32 channel type (U8, U16, F16).
1029/// Panics for F32 (caller must check); CMYK is rejected upstream by `assert_not_cmyk`.
1030fn to_f32_step(ct: ChannelType) -> ConvertStep {
1031    match ct {
1032        ChannelType::U8 => ConvertStep::NaiveU8ToF32,
1033        ChannelType::U16 => ConvertStep::U16ToF32,
1034        ChannelType::F16 => ConvertStep::F16ToF32,
1035        _ => unreachable!("to_f32_step called with F32 or unsupported channel type"),
1036    }
1037}
1038
1039/// F32→depth step for any non-F32 channel type.
1040fn f32_to_depth_step(ct: ChannelType) -> ConvertStep {
1041    match ct {
1042        ChannelType::U8 => ConvertStep::NaiveF32ToU8,
1043        ChannelType::U16 => ConvertStep::F32ToU16,
1044        ChannelType::F16 => ConvertStep::F32ToF16,
1045        _ => unreachable!("f32_to_depth_step called with F32 or unsupported channel type"),
1046    }
1047}
1048
1049/// Determine the depth conversion step(s), considering transfer functions.
1050///
1051/// Returns one or more steps. Multi-step conversions route through an F32
1052/// linear intermediate (e.g. PQ U16 → sRGB U8 goes PQ U16 → Linear F32 →
1053/// sRGB U8), and same-depth integer TF changes route through an F32 linear
1054/// intermediate too: passing integer bytes through unchanged under a new
1055/// TF label produces wrong pixels.
1056fn depth_steps(
1057    from: ChannelType,
1058    to: ChannelType,
1059    from_tf: TransferFunction,
1060    to_tf: TransferFunction,
1061) -> Result<Vec<ConvertStep>, ConvertError> {
1062    if from == to && from_tf == to_tf {
1063        return Ok(Vec::new());
1064    }
1065
1066    // Same depth, F32: apply EOTF/OETF in place.
1067    if from == to && from == ChannelType::F32 {
1068        return Ok(f32_tf_pair_steps(from_tf, to_tf));
1069    }
1070
1071    // Same depth, non-F32 (U8/U16/F16): TF change requires re-encoding. Route
1072    // through F32 linear intermediate — passing bytes through labeled as a
1073    // different TF produces wrong pixels.
1074    //
1075    // Exception: if either TF is Unknown, we don't know the correct conversion.
1076    // Preserve bytes exactly (no F32 round-trip — that would introduce U8/U16
1077    // rounding error for no semantic benefit). Addressed properly by issue
1078    // #19 [C]/[D] via opt-in deprecate-and-add.
1079    if from == to && from != ChannelType::F32 {
1080        if from_tf == TransferFunction::Unknown || to_tf == TransferFunction::Unknown {
1081            return Ok(Vec::new());
1082        }
1083        let mut steps = Vec::with_capacity(4);
1084        steps.push(to_f32_step(from));
1085        steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1086        steps.push(f32_to_depth_step(to));
1087        return Ok(steps);
1088    }
1089
1090    match (from, to) {
1091        (ChannelType::U8, ChannelType::F32) => {
1092            // Fused sRGB EOTF kernel — sRGB only. BT.709 uses a different EOTF
1093            // (~17% linear-light error at mid-gray if we routed it through the
1094            // sRGB kernel) and must compose through the F32 BT.709 EOTF step.
1095            if from_tf == TransferFunction::Srgb && to_tf == TransferFunction::Linear {
1096                Ok(vec![ConvertStep::SrgbU8ToLinearF32])
1097            } else if from_tf == to_tf {
1098                Ok(vec![ConvertStep::NaiveU8ToF32])
1099            } else {
1100                // Cross-depth + cross-TF: linearize/encode after the U8→F32 scale.
1101                // Previously dropped the TF math and returned bytes labeled with
1102                // the target TF — silent wrong pixels for any TF pair other than
1103                // {Srgb,Bt709}→Linear.
1104                let mut steps = Vec::with_capacity(3);
1105                steps.push(ConvertStep::NaiveU8ToF32);
1106                steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1107                Ok(steps)
1108            }
1109        }
1110        (ChannelType::F32, ChannelType::U8) => {
1111            // Fused sRGB OETF kernel — sRGB only (same reason as above).
1112            if from_tf == TransferFunction::Linear && to_tf == TransferFunction::Srgb {
1113                Ok(vec![ConvertStep::LinearF32ToSrgbU8])
1114            } else if from_tf == to_tf {
1115                Ok(vec![ConvertStep::NaiveF32ToU8])
1116            } else {
1117                // Linearize/encode in F32 first, then compress to U8.
1118                let mut steps = f32_tf_pair_steps(from_tf, to_tf);
1119                steps.push(ConvertStep::NaiveF32ToU8);
1120                Ok(steps)
1121            }
1122        }
1123        (ChannelType::U16, ChannelType::F32) => {
1124            // PQ/HLG U16 → Linear F32: apply EOTF during conversion.
1125            match (from_tf, to_tf) {
1126                (TransferFunction::Pq, TransferFunction::Linear) => {
1127                    Ok(vec![ConvertStep::PqU16ToLinearF32])
1128                }
1129                (TransferFunction::Hlg, TransferFunction::Linear) => {
1130                    Ok(vec![ConvertStep::HlgU16ToLinearF32])
1131                }
1132                (a, b) if a == b => Ok(vec![ConvertStep::U16ToF32]),
1133                _ => {
1134                    let mut steps = Vec::with_capacity(3);
1135                    steps.push(ConvertStep::U16ToF32);
1136                    steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1137                    Ok(steps)
1138                }
1139            }
1140        }
1141        (ChannelType::F32, ChannelType::U16) => {
1142            // Linear F32 → PQ/HLG U16: apply OETF during conversion.
1143            match (from_tf, to_tf) {
1144                (TransferFunction::Linear, TransferFunction::Pq) => {
1145                    Ok(vec![ConvertStep::LinearF32ToPqU16])
1146                }
1147                (TransferFunction::Linear, TransferFunction::Hlg) => {
1148                    Ok(vec![ConvertStep::LinearF32ToHlgU16])
1149                }
1150                (a, b) if a == b => Ok(vec![ConvertStep::F32ToU16]),
1151                _ => {
1152                    let mut steps = f32_tf_pair_steps(from_tf, to_tf);
1153                    steps.push(ConvertStep::F32ToU16);
1154                    Ok(steps)
1155                }
1156            }
1157        }
1158        (ChannelType::U16, ChannelType::U8) => {
1159            // HDR U16 → SDR U8: go through linear F32 with proper EOTF → OETF.
1160            if from_tf == TransferFunction::Pq && to_tf == TransferFunction::Srgb {
1161                Ok(vec![
1162                    ConvertStep::PqU16ToLinearF32,
1163                    ConvertStep::LinearF32ToSrgbU8,
1164                ])
1165            } else if from_tf == TransferFunction::Hlg && to_tf == TransferFunction::Srgb {
1166                Ok(vec![
1167                    ConvertStep::HlgU16ToLinearF32,
1168                    ConvertStep::LinearF32ToSrgbU8,
1169                ])
1170            } else if from_tf == to_tf {
1171                Ok(vec![ConvertStep::U16ToU8])
1172            } else {
1173                let mut steps = Vec::with_capacity(4);
1174                steps.push(ConvertStep::U16ToF32);
1175                steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1176                steps.push(ConvertStep::NaiveF32ToU8);
1177                Ok(steps)
1178            }
1179        }
1180        (ChannelType::U8, ChannelType::U16) => {
1181            if from_tf == to_tf {
1182                Ok(vec![ConvertStep::U8ToU16])
1183            } else {
1184                let mut steps = Vec::with_capacity(4);
1185                steps.push(ConvertStep::NaiveU8ToF32);
1186                steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1187                steps.push(ConvertStep::F32ToU16);
1188                Ok(steps)
1189            }
1190        }
1191        // F16 paths route through F32. No fused TF kernels yet — these are
1192        // optimization targets for a future pass.
1193        (ChannelType::F16, ChannelType::F32) => {
1194            let mut steps = Vec::with_capacity(3);
1195            steps.push(ConvertStep::F16ToF32);
1196            if from_tf != to_tf {
1197                steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1198            }
1199            Ok(steps)
1200        }
1201        (ChannelType::F32, ChannelType::F16) => {
1202            let mut steps = Vec::with_capacity(3);
1203            if from_tf != to_tf {
1204                steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1205            }
1206            steps.push(ConvertStep::F32ToF16);
1207            Ok(steps)
1208        }
1209        (ChannelType::F16, ChannelType::U8) => {
1210            let mut steps = Vec::with_capacity(4);
1211            steps.push(ConvertStep::F16ToF32);
1212            if from_tf == TransferFunction::Linear && to_tf == TransferFunction::Srgb {
1213                steps.push(ConvertStep::LinearF32ToSrgbU8);
1214            } else if from_tf == to_tf {
1215                steps.push(ConvertStep::NaiveF32ToU8);
1216            } else {
1217                steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1218                steps.push(ConvertStep::NaiveF32ToU8);
1219            }
1220            Ok(steps)
1221        }
1222        (ChannelType::U8, ChannelType::F16) => {
1223            let mut steps = Vec::with_capacity(4);
1224            if from_tf == TransferFunction::Srgb && to_tf == TransferFunction::Linear {
1225                steps.push(ConvertStep::SrgbU8ToLinearF32);
1226            } else if from_tf == to_tf {
1227                steps.push(ConvertStep::NaiveU8ToF32);
1228            } else {
1229                steps.push(ConvertStep::NaiveU8ToF32);
1230                steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1231            }
1232            steps.push(ConvertStep::F32ToF16);
1233            Ok(steps)
1234        }
1235        (ChannelType::F16, ChannelType::U16) => {
1236            let mut steps = Vec::with_capacity(4);
1237            steps.push(ConvertStep::F16ToF32);
1238            if from_tf == TransferFunction::Linear && to_tf == TransferFunction::Pq {
1239                steps.push(ConvertStep::LinearF32ToPqU16);
1240            } else if from_tf == TransferFunction::Linear && to_tf == TransferFunction::Hlg {
1241                steps.push(ConvertStep::LinearF32ToHlgU16);
1242            } else if from_tf == to_tf {
1243                steps.push(ConvertStep::F32ToU16);
1244            } else {
1245                steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1246                steps.push(ConvertStep::F32ToU16);
1247            }
1248            Ok(steps)
1249        }
1250        (ChannelType::U16, ChannelType::F16) => {
1251            let mut steps = Vec::with_capacity(4);
1252            if from_tf == TransferFunction::Pq && to_tf == TransferFunction::Linear {
1253                steps.push(ConvertStep::PqU16ToLinearF32);
1254            } else if from_tf == TransferFunction::Hlg && to_tf == TransferFunction::Linear {
1255                steps.push(ConvertStep::HlgU16ToLinearF32);
1256            } else if from_tf == to_tf {
1257                steps.push(ConvertStep::U16ToF32);
1258            } else {
1259                steps.push(ConvertStep::U16ToF32);
1260                steps.extend(f32_tf_pair_steps(from_tf, to_tf));
1261            }
1262            steps.push(ConvertStep::F32ToF16);
1263            Ok(steps)
1264        }
1265        _ => Err(ConvertError::NoPath {
1266            from: PixelDescriptor::new(from, ChannelLayout::Rgb, None, from_tf),
1267            to: PixelDescriptor::new(to, ChannelLayout::Rgb, None, to_tf),
1268        }),
1269    }
1270}
1271
1272// ---------------------------------------------------------------------------
1273// Row conversion kernels
1274// ---------------------------------------------------------------------------
1275
1276/// Pre-allocated scratch buffer for multi-step row conversions.
1277///
1278/// Eliminates per-row heap allocation by reusing two ping-pong halves
1279/// of a single buffer across calls. Create once per [`ConvertPlan`],
1280/// then pass to `convert_row_buffered` for each row.
1281pub(crate) struct ConvertScratch {
1282    /// Single allocation split into two halves via `split_at_mut`.
1283    /// Stored as `Vec<u32>` to guarantee 4-byte alignment, which lets
1284    /// garb and bytemuck use fast aligned paths instead of unaligned fallbacks.
1285    buf: Vec<u32>,
1286}
1287
1288impl ConvertScratch {
1289    /// Create empty scratch (buffer grows on first use).
1290    pub(crate) fn new() -> Self {
1291        Self { buf: Vec::new() }
1292    }
1293
1294    /// Ensure the buffer is large enough for two halves of the max
1295    /// intermediate format at the given width.
1296    fn ensure_capacity(&mut self, plan: &ConvertPlan, width: u32) {
1297        let half_bytes = (width as usize) * plan.max_intermediate_bpp();
1298        let total_u32 = (half_bytes * 2).div_ceil(4);
1299        if self.buf.len() < total_u32 {
1300            self.buf.resize(total_u32, 0);
1301        }
1302    }
1303}
1304
1305impl core::fmt::Debug for ConvertScratch {
1306    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1307        f.debug_struct("ConvertScratch")
1308            .field("capacity", &self.buf.capacity())
1309            .finish()
1310    }
1311}
1312
1313/// Convert one row of `width` pixels using a pre-computed plan.
1314///
1315/// `src` and `dst` must be sized for `width` pixels in their respective formats.
1316/// For multi-step plans, an internal scratch buffer is allocated per call.
1317/// Prefer [`RowConverter`](crate::RowConverter) in hot loops (reuses scratch buffers).
1318pub fn convert_row(plan: &ConvertPlan, src: &[u8], dst: &mut [u8], width: u32) {
1319    if plan.is_identity() {
1320        let len = min(src.len(), dst.len());
1321        dst[..len].copy_from_slice(&src[..len]);
1322        return;
1323    }
1324
1325    if plan.steps.len() == 1 {
1326        apply_step_u8(&plan.steps[0], src, dst, width, plan.from, plan.to);
1327        return;
1328    }
1329
1330    // Allocating fallback for one-off calls.
1331    let mut scratch = ConvertScratch::new();
1332    convert_row_buffered(plan, src, dst, width, &mut scratch);
1333}
1334
1335/// Convert one row of `width` pixels, reusing pre-allocated scratch buffers.
1336///
1337/// For multi-step plans this avoids per-row heap allocation by ping-ponging
1338/// between two halves of a scratch buffer. Single-step plans bypass scratch.
1339pub(crate) fn convert_row_buffered(
1340    plan: &ConvertPlan,
1341    src: &[u8],
1342    dst: &mut [u8],
1343    width: u32,
1344    scratch: &mut ConvertScratch,
1345) {
1346    if plan.is_identity() {
1347        let len = min(src.len(), dst.len());
1348        dst[..len].copy_from_slice(&src[..len]);
1349        return;
1350    }
1351
1352    if plan.steps.len() == 1 {
1353        apply_step_u8(&plan.steps[0], src, dst, width, plan.from, plan.to);
1354        return;
1355    }
1356
1357    scratch.ensure_capacity(plan, width);
1358
1359    let buf_bytes: &mut [u8] = bytemuck::cast_slice_mut(&mut scratch.buf);
1360    let half = buf_bytes.len() / 2;
1361    let (buf_a, buf_b) = buf_bytes.split_at_mut(half);
1362
1363    let num_steps = plan.steps.len();
1364    let mut current_desc = plan.from;
1365
1366    for (i, step) in plan.steps.iter().enumerate() {
1367        let is_last = i == num_steps - 1;
1368        let next_desc = if is_last {
1369            plan.to
1370        } else {
1371            intermediate_desc(current_desc, step)
1372        };
1373
1374        let next_len = (width as usize) * next_desc.bytes_per_pixel();
1375        let curr_len = (width as usize) * current_desc.bytes_per_pixel();
1376
1377        // Ping-pong: even steps read src/buf_b and write buf_a;
1378        // odd steps read buf_a and write buf_b. Each branch only
1379        // borrows each half in one mode, satisfying the borrow checker.
1380        if i % 2 == 0 {
1381            let input = if i == 0 { src } else { &buf_b[..curr_len] };
1382            if is_last {
1383                apply_step_u8(step, input, dst, width, current_desc, next_desc);
1384            } else {
1385                apply_step_u8(
1386                    step,
1387                    input,
1388                    &mut buf_a[..next_len],
1389                    width,
1390                    current_desc,
1391                    next_desc,
1392                );
1393            }
1394        } else {
1395            let input = &buf_a[..curr_len];
1396            if is_last {
1397                apply_step_u8(step, input, dst, width, current_desc, next_desc);
1398            } else {
1399                apply_step_u8(
1400                    step,
1401                    input,
1402                    &mut buf_b[..next_len],
1403                    width,
1404                    current_desc,
1405                    next_desc,
1406                );
1407            }
1408        }
1409
1410        current_desc = next_desc;
1411    }
1412}
1413
1414/// Check if two steps are inverses that cancel each other.
1415/// Collapse `[SrgbU8ToLinearF32, GamutMatrix*F32(m), LinearF32ToSrgbU8]`
1416/// into a single fused matlut step. Mutates in place.
1417fn fuse_matlut_patterns(steps: &mut Vec<ConvertStep>) {
1418    let mut i = 0;
1419    while i + 2 < steps.len() {
1420        let rewrite = match (&steps[i], &steps[i + 1], &steps[i + 2]) {
1421            (
1422                ConvertStep::SrgbU8ToLinearF32,
1423                ConvertStep::GamutMatrixRgbF32(m),
1424                ConvertStep::LinearF32ToSrgbU8,
1425            ) => Some(ConvertStep::FusedSrgbU8GamutRgb(*m)),
1426            (
1427                ConvertStep::SrgbU8ToLinearF32,
1428                ConvertStep::GamutMatrixRgbaF32(m),
1429                ConvertStep::LinearF32ToSrgbU8,
1430            ) => Some(ConvertStep::FusedSrgbU8GamutRgba(*m)),
1431            _ => None,
1432        };
1433        if let Some(fused) = rewrite {
1434            steps[i] = fused;
1435            steps.drain(i + 1..i + 3);
1436            continue;
1437        }
1438        i += 1;
1439    }
1440}
1441
1442fn are_inverse(a: &ConvertStep, b: &ConvertStep) -> bool {
1443    matches!(
1444        (a, b),
1445        // Self-inverse
1446        (ConvertStep::SwizzleBgraRgba, ConvertStep::SwizzleBgraRgba)
1447        // Layout inverses (lossless for opaque data)
1448        | (ConvertStep::AddAlpha, ConvertStep::DropAlpha)
1449        // Transfer function f32↔f32 (exact inverses in float)
1450        | (ConvertStep::SrgbF32ToLinearF32, ConvertStep::LinearF32ToSrgbF32)
1451        | (ConvertStep::LinearF32ToSrgbF32, ConvertStep::SrgbF32ToLinearF32)
1452        | (ConvertStep::PqF32ToLinearF32, ConvertStep::LinearF32ToPqF32)
1453        | (ConvertStep::LinearF32ToPqF32, ConvertStep::PqF32ToLinearF32)
1454        | (ConvertStep::HlgF32ToLinearF32, ConvertStep::LinearF32ToHlgF32)
1455        | (ConvertStep::LinearF32ToHlgF32, ConvertStep::HlgF32ToLinearF32)
1456        | (ConvertStep::Bt709F32ToLinearF32, ConvertStep::LinearF32ToBt709F32)
1457        | (ConvertStep::LinearF32ToBt709F32, ConvertStep::Bt709F32ToLinearF32)
1458        | (ConvertStep::Gamma22F32ToLinearF32, ConvertStep::LinearF32ToGamma22F32)
1459        | (ConvertStep::LinearF32ToGamma22F32, ConvertStep::Gamma22F32ToLinearF32)
1460        // Alpha mode (exact inverses in float)
1461        | (ConvertStep::StraightToPremul, ConvertStep::PremulToStraight)
1462        | (ConvertStep::PremulToStraight, ConvertStep::StraightToPremul)
1463        // Color model (exact inverses in float)
1464        | (ConvertStep::LinearRgbToOklab, ConvertStep::OklabToLinearRgb)
1465        | (ConvertStep::OklabToLinearRgb, ConvertStep::LinearRgbToOklab)
1466        | (ConvertStep::LinearRgbaToOklaba, ConvertStep::OklabaToLinearRgba)
1467        | (ConvertStep::OklabaToLinearRgba, ConvertStep::LinearRgbaToOklaba)
1468        // Cross-depth pairs (near-lossless for same depth class)
1469        | (ConvertStep::NaiveU8ToF32, ConvertStep::NaiveF32ToU8)
1470        | (ConvertStep::NaiveF32ToU8, ConvertStep::NaiveU8ToF32)
1471        | (ConvertStep::U8ToU16, ConvertStep::U16ToU8)
1472        | (ConvertStep::U16ToU8, ConvertStep::U8ToU16)
1473        | (ConvertStep::U16ToF32, ConvertStep::F32ToU16)
1474        | (ConvertStep::F32ToU16, ConvertStep::U16ToF32)
1475        | (ConvertStep::F16ToF32, ConvertStep::F32ToF16)
1476        | (ConvertStep::F32ToF16, ConvertStep::F16ToF32)
1477        // Cross-depth with transfer (near-lossless roundtrip)
1478        | (ConvertStep::SrgbU8ToLinearF32, ConvertStep::LinearF32ToSrgbU8)
1479        | (ConvertStep::LinearF32ToSrgbU8, ConvertStep::SrgbU8ToLinearF32)
1480        | (ConvertStep::PqU16ToLinearF32, ConvertStep::LinearF32ToPqU16)
1481        | (ConvertStep::LinearF32ToPqU16, ConvertStep::PqU16ToLinearF32)
1482        | (ConvertStep::HlgU16ToLinearF32, ConvertStep::LinearF32ToHlgU16)
1483        | (ConvertStep::LinearF32ToHlgU16, ConvertStep::HlgU16ToLinearF32)
1484        // Extended-range sRGB f32 pairs
1485        | (ConvertStep::SrgbF32ToLinearF32Extended, ConvertStep::LinearF32ToSrgbF32Extended)
1486        | (ConvertStep::LinearF32ToSrgbF32Extended, ConvertStep::SrgbF32ToLinearF32Extended)
1487    )
1488}
1489
1490/// Compute the descriptor after applying one step.
1491fn intermediate_desc(current: PixelDescriptor, step: &ConvertStep) -> PixelDescriptor {
1492    match step {
1493        ConvertStep::Identity => current,
1494        ConvertStep::SwizzleBgraRgba => {
1495            let new_layout = match current.layout() {
1496                ChannelLayout::Bgra => ChannelLayout::Rgba,
1497                ChannelLayout::Rgba => ChannelLayout::Bgra,
1498                other => other,
1499            };
1500            PixelDescriptor::new(
1501                current.channel_type(),
1502                new_layout,
1503                current.alpha(),
1504                current.transfer(),
1505            )
1506        }
1507        ConvertStep::AddAlpha => PixelDescriptor::new(
1508            current.channel_type(),
1509            ChannelLayout::Rgba,
1510            Some(AlphaMode::Straight),
1511            current.transfer(),
1512        ),
1513        ConvertStep::RgbToBgra => PixelDescriptor::new(
1514            current.channel_type(),
1515            ChannelLayout::Bgra,
1516            Some(AlphaMode::Straight),
1517            current.transfer(),
1518        ),
1519        ConvertStep::DropAlpha | ConvertStep::MatteComposite { .. } => PixelDescriptor::new(
1520            current.channel_type(),
1521            ChannelLayout::Rgb,
1522            None,
1523            current.transfer(),
1524        ),
1525        ConvertStep::GrayToRgb => PixelDescriptor::new(
1526            current.channel_type(),
1527            ChannelLayout::Rgb,
1528            None,
1529            current.transfer(),
1530        ),
1531        ConvertStep::GrayToRgba => PixelDescriptor::new(
1532            current.channel_type(),
1533            ChannelLayout::Rgba,
1534            Some(AlphaMode::Straight),
1535            current.transfer(),
1536        ),
1537        ConvertStep::RgbToGray { .. } | ConvertStep::RgbaToGray { .. } => PixelDescriptor::new(
1538            current.channel_type(),
1539            ChannelLayout::Gray,
1540            None,
1541            current.transfer(),
1542        ),
1543        ConvertStep::GrayAlphaToRgba => PixelDescriptor::new(
1544            current.channel_type(),
1545            ChannelLayout::Rgba,
1546            current.alpha(),
1547            current.transfer(),
1548        ),
1549        ConvertStep::GrayAlphaToRgb => PixelDescriptor::new(
1550            current.channel_type(),
1551            ChannelLayout::Rgb,
1552            None,
1553            current.transfer(),
1554        ),
1555        ConvertStep::GrayToGrayAlpha => PixelDescriptor::new(
1556            current.channel_type(),
1557            ChannelLayout::GrayAlpha,
1558            Some(AlphaMode::Straight),
1559            current.transfer(),
1560        ),
1561        ConvertStep::GrayAlphaToGray => PixelDescriptor::new(
1562            current.channel_type(),
1563            ChannelLayout::Gray,
1564            None,
1565            current.transfer(),
1566        ),
1567        ConvertStep::SrgbU8ToLinearF32
1568        | ConvertStep::NaiveU8ToF32
1569        | ConvertStep::U16ToF32
1570        | ConvertStep::PqU16ToLinearF32
1571        | ConvertStep::HlgU16ToLinearF32
1572        | ConvertStep::PqF32ToLinearF32
1573        | ConvertStep::HlgF32ToLinearF32
1574        | ConvertStep::SrgbF32ToLinearF32
1575        | ConvertStep::SrgbF32ToLinearF32Extended
1576        | ConvertStep::Bt709F32ToLinearF32
1577        | ConvertStep::Gamma22F32ToLinearF32 => PixelDescriptor::new(
1578            ChannelType::F32,
1579            current.layout(),
1580            current.alpha(),
1581            TransferFunction::Linear,
1582        ),
1583        ConvertStep::LinearF32ToSrgbU8 | ConvertStep::NaiveF32ToU8 | ConvertStep::U16ToU8 => {
1584            PixelDescriptor::new(
1585                ChannelType::U8,
1586                current.layout(),
1587                current.alpha(),
1588                TransferFunction::Srgb,
1589            )
1590        }
1591        ConvertStep::U8ToU16 => PixelDescriptor::new(
1592            ChannelType::U16,
1593            current.layout(),
1594            current.alpha(),
1595            current.transfer(),
1596        ),
1597        ConvertStep::F32ToU16 | ConvertStep::LinearF32ToPqU16 | ConvertStep::LinearF32ToHlgU16 => {
1598            let tf = match step {
1599                ConvertStep::LinearF32ToPqU16 => TransferFunction::Pq,
1600                ConvertStep::LinearF32ToHlgU16 => TransferFunction::Hlg,
1601                _ => current.transfer(),
1602            };
1603            PixelDescriptor::new(ChannelType::U16, current.layout(), current.alpha(), tf)
1604        }
1605        ConvertStep::LinearF32ToPqF32 => PixelDescriptor::new(
1606            ChannelType::F32,
1607            current.layout(),
1608            current.alpha(),
1609            TransferFunction::Pq,
1610        ),
1611        ConvertStep::LinearF32ToHlgF32 => PixelDescriptor::new(
1612            ChannelType::F32,
1613            current.layout(),
1614            current.alpha(),
1615            TransferFunction::Hlg,
1616        ),
1617        ConvertStep::LinearF32ToSrgbF32 | ConvertStep::LinearF32ToSrgbF32Extended => {
1618            PixelDescriptor::new(
1619                ChannelType::F32,
1620                current.layout(),
1621                current.alpha(),
1622                TransferFunction::Srgb,
1623            )
1624        }
1625        ConvertStep::LinearF32ToBt709F32 => PixelDescriptor::new(
1626            ChannelType::F32,
1627            current.layout(),
1628            current.alpha(),
1629            TransferFunction::Bt709,
1630        ),
1631        ConvertStep::LinearF32ToGamma22F32 => PixelDescriptor::new(
1632            ChannelType::F32,
1633            current.layout(),
1634            current.alpha(),
1635            TransferFunction::Gamma22,
1636        ),
1637        ConvertStep::StraightToPremul => PixelDescriptor::new(
1638            current.channel_type(),
1639            current.layout(),
1640            Some(AlphaMode::Premultiplied),
1641            current.transfer(),
1642        ),
1643        ConvertStep::PremulToStraight => PixelDescriptor::new(
1644            current.channel_type(),
1645            current.layout(),
1646            Some(AlphaMode::Straight),
1647            current.transfer(),
1648        ),
1649        ConvertStep::LinearRgbToOklab => PixelDescriptor::new(
1650            ChannelType::F32,
1651            ChannelLayout::Oklab,
1652            None,
1653            TransferFunction::Unknown,
1654        )
1655        .with_primaries(current.primaries),
1656        ConvertStep::OklabToLinearRgb => PixelDescriptor::new(
1657            ChannelType::F32,
1658            ChannelLayout::Rgb,
1659            None,
1660            TransferFunction::Linear,
1661        )
1662        .with_primaries(current.primaries),
1663        ConvertStep::LinearRgbaToOklaba => PixelDescriptor::new(
1664            ChannelType::F32,
1665            ChannelLayout::OklabA,
1666            Some(AlphaMode::Straight),
1667            TransferFunction::Unknown,
1668        )
1669        .with_primaries(current.primaries),
1670        ConvertStep::OklabaToLinearRgba => PixelDescriptor::new(
1671            ChannelType::F32,
1672            ChannelLayout::Rgba,
1673            current.alpha(),
1674            TransferFunction::Linear,
1675        )
1676        .with_primaries(current.primaries),
1677
1678        // Gamut matrix: same depth/layout/TF, but primaries change.
1679        // The actual target primaries are embedded in the matrix, not tracked
1680        // here — we mark them as Unknown since the step doesn't carry that info.
1681        // The final plan.to descriptor has the correct primaries.
1682        ConvertStep::GamutMatrixRgbF32(_) => PixelDescriptor::new(
1683            ChannelType::F32,
1684            current.layout(),
1685            current.alpha(),
1686            TransferFunction::Linear,
1687        ),
1688        ConvertStep::GamutMatrixRgbaF32(_) => PixelDescriptor::new(
1689            ChannelType::F32,
1690            current.layout(),
1691            current.alpha(),
1692            TransferFunction::Linear,
1693        ),
1694        // Fused steps: u8 sRGB in, u8 sRGB out (same layout, same alpha).
1695        ConvertStep::FusedSrgbU8GamutRgb(_) | ConvertStep::FusedSrgbU8GamutRgba(_) => {
1696            PixelDescriptor::new(
1697                ChannelType::U8,
1698                current.layout(),
1699                current.alpha(),
1700                TransferFunction::Srgb,
1701            )
1702        }
1703        ConvertStep::FusedSrgbU16GamutRgb(_) => PixelDescriptor::new(
1704            ChannelType::U16,
1705            current.layout(),
1706            current.alpha(),
1707            TransferFunction::Srgb,
1708        ),
1709        ConvertStep::FusedSrgbU8ToLinearF32Rgb(_) => PixelDescriptor::new(
1710            ChannelType::F32,
1711            current.layout(),
1712            current.alpha(),
1713            TransferFunction::Linear,
1714        ),
1715        ConvertStep::FusedLinearF32ToSrgbU8Rgb(_) => PixelDescriptor::new(
1716            ChannelType::U8,
1717            current.layout(),
1718            current.alpha(),
1719            TransferFunction::Srgb,
1720        ),
1721        // F16↔F32 depth-only steps. No TF implication: same TF on both sides.
1722        ConvertStep::F16ToF32 => PixelDescriptor::new(
1723            ChannelType::F32,
1724            current.layout(),
1725            current.alpha(),
1726            current.transfer(),
1727        ),
1728        ConvertStep::F32ToF16 => PixelDescriptor::new(
1729            ChannelType::F16,
1730            current.layout(),
1731            current.alpha(),
1732            current.transfer(),
1733        ),
1734    }
1735}
1736
1737#[path = "convert_kernels.rs"]
1738mod convert_kernels;
1739use convert_kernels::apply_step_u8;
1740pub(crate) use convert_kernels::{hlg_eotf, hlg_oetf, pq_eotf, pq_oetf};
zenpixels_convert/convert.rs

zenpixels_convert/
convert.rs