Skip to main content

zenpixels_convert/
convert.rs

1//! Row-level pixel conversion kernels.
2//!
3//! Each kernel converts one row of `width` pixels from a source format to
4//! a destination format. Individual step kernels are pure functions with
5//! no allocation. Multi-step plans use [`ConvertScratch`] ping-pong
6//! buffers to avoid per-row heap allocation in streaming loops.
7
8use alloc::vec;
9use alloc::vec::Vec;
10use core::cmp::min;
11
12use crate::policy::{AlphaPolicy, ConvertOptions, DepthPolicy};
13use crate::{
14    AlphaMode, ChannelLayout, ChannelType, ColorPrimaries, ConvertError, PixelDescriptor,
15    TransferFunction,
16};
17use whereat::{At, ResultAtExt};
18
19/// Pre-computed conversion plan.
20///
21/// Stores the chain of steps needed to convert from one format to another.
22/// Created once, applied to every row.
23#[derive(Clone, Debug)]
24pub struct ConvertPlan {
25    pub(crate) from: PixelDescriptor,
26    pub(crate) to: PixelDescriptor,
27    pub(crate) steps: Vec<ConvertStep>,
28}
29
30/// A single conversion step.
31///
32/// Not `Copy` — some variants (e.g., [`ExternalTransform`]) carry an
33/// `Arc`. Peephole rewrites must use `.clone()` or index assignment with
34/// pattern matching instead of `*step` dereferences.
35///
36/// [`ExternalTransform`]: ConvertStep::ExternalTransform
37#[derive(Clone)]
38pub(crate) enum ConvertStep {
39    /// No-op (identity).
40    Identity,
41    /// BGRA → RGBA byte swizzle (or vice versa).
42    SwizzleBgraRgba,
43    /// Fused RGB → BGRA: byte swap + add opaque alpha in a single SIMD pass.
44    /// Equivalent to `[AddAlpha, SwizzleBgraRgba]` but writes the destination
45    /// once instead of twice.
46    RgbToBgra,
47    /// Add alpha channel (3ch → 4ch), filling with opaque.
48    AddAlpha,
49    /// Drop alpha channel (4ch → 3ch).
50    DropAlpha,
51    /// Composite onto solid matte color, then drop alpha (4ch → 3ch).
52    ///
53    /// Blends in linear light: src and matte are converted from sRGB to linear,
54    /// alpha-blended, then converted back. For f32 data, pixel values are
55    /// assumed already linear; only the sRGB u8 matte is linearized.
56    MatteComposite { r: u8, g: u8, b: u8 },
57    /// Gray → RGB (replicate gray to all 3 channels).
58    GrayToRgb,
59    /// Gray → RGBA (replicate + opaque alpha).
60    GrayToRgba,
61    /// RGB → Gray (BT.709 luma).
62    RgbToGray,
63    /// RGBA → Gray (BT.709 luma, drop alpha).
64    RgbaToGray,
65    /// GrayAlpha → RGBA (replicate gray, keep alpha).
66    GrayAlphaToRgba,
67    /// GrayAlpha → RGB (replicate gray, drop alpha).
68    GrayAlphaToRgb,
69    /// Gray → GrayAlpha (add opaque alpha).
70    GrayToGrayAlpha,
71    /// GrayAlpha → Gray (drop alpha).
72    GrayAlphaToGray,
73    /// sRGB u8 → linear f32 (per channel, EOTF).
74    SrgbU8ToLinearF32,
75    /// Linear f32 → sRGB u8 (per channel, OETF).
76    LinearF32ToSrgbU8,
77    /// Naive u8 → f32 (v / 255.0, no gamma).
78    NaiveU8ToF32,
79    /// Naive f32 → u8 (clamp * 255 + 0.5, no gamma).
80    NaiveF32ToU8,
81    /// u16 → u8 ((v * 255 + 32768) >> 16).
82    U16ToU8,
83    /// u8 → u16 (v * 257).
84    U8ToU16,
85    /// u16 → f32 (v / 65535.0).
86    U16ToF32,
87    /// f32 → u16 (clamp * 65535 + 0.5).
88    F32ToU16,
89    /// PQ (SMPTE ST 2084) u16 → linear f32 (EOTF).
90    PqU16ToLinearF32,
91    /// Linear f32 → PQ u16 (inverse EOTF / OETF).
92    LinearF32ToPqU16,
93    /// PQ f32 [0,1] → linear f32 (EOTF, no depth change).
94    PqF32ToLinearF32,
95    /// Linear f32 → PQ f32 [0,1] (OETF, no depth change).
96    LinearF32ToPqF32,
97    /// HLG (ARIB STD-B67) u16 → linear f32 (EOTF).
98    HlgU16ToLinearF32,
99    /// Linear f32 → HLG u16 (OETF).
100    LinearF32ToHlgU16,
101    /// HLG f32 [0,1] → linear f32 (EOTF, no depth change).
102    HlgF32ToLinearF32,
103    /// Linear f32 → HLG f32 [0,1] (OETF, no depth change).
104    LinearF32ToHlgF32,
105    /// sRGB f32 [0,1] → linear f32 (EOTF, no depth change). Clamps input.
106    SrgbF32ToLinearF32,
107    /// Linear f32 → sRGB f32 [0,1] (OETF, no depth change). Clamps output.
108    LinearF32ToSrgbF32,
109    /// sRGB f32 → linear f32 (EOTF, sign-preserving extended range).
110    /// Emitted when `ConvertOptions::clip_out_of_gamut == false`.
111    SrgbF32ToLinearF32Extended,
112    /// Linear f32 → sRGB f32 (OETF, sign-preserving extended range).
113    LinearF32ToSrgbF32Extended,
114    /// BT.709 f32 [0,1] → linear f32 (EOTF, no depth change).
115    Bt709F32ToLinearF32,
116    /// Linear f32 → BT.709 f32 [0,1] (OETF, no depth change).
117    LinearF32ToBt709F32,
118    /// Straight → Premultiplied alpha.
119    StraightToPremul,
120    /// Premultiplied → Straight alpha.
121    PremulToStraight,
122    /// Linear RGB f32 → Oklab f32 (3-channel color model change).
123    LinearRgbToOklab,
124    /// Oklab f32 → Linear RGB f32 (3-channel color model change).
125    OklabToLinearRgb,
126    /// Linear RGBA f32 → Oklaba f32 (4-channel, alpha preserved).
127    LinearRgbaToOklaba,
128    /// Oklaba f32 → Linear RGBA f32 (4-channel, alpha preserved).
129    OklabaToLinearRgba,
130    /// Apply a 3×3 gamut matrix to linear RGB f32 (3 channels per pixel).
131    ///
132    /// Used for color primaries conversion (e.g., BT.709 ↔ Display P3 ↔ BT.2020).
133    /// Data must be in linear light. The matrix is row-major `[[f32; 3]; 3]`
134    /// flattened to `[f32; 9]`.
135    GamutMatrixRgbF32([f32; 9]),
136    /// Apply a 3×3 gamut matrix to linear RGBA f32 (4 channels, alpha passthrough).
137    GamutMatrixRgbaF32([f32; 9]),
138    /// Fused u8-sRGB RGB primaries conversion: LUT linearize → SIMD matrix →
139    /// SIMD f32→i32 → LUT encode, in one pass. Replaces the 3-step sequence
140    /// `[SrgbU8ToLinearF32, GamutMatrixRgbF32(m), LinearF32ToSrgbU8]`.
141    FusedSrgbU8GamutRgb([f32; 9]),
142    /// Fused u8-sRGB RGBA primaries conversion (alpha passthrough).
143    FusedSrgbU8GamutRgba([f32; 9]),
144    /// Fused u16-sRGB RGB primaries conversion via 65K-entry LUTs.
145    FusedSrgbU16GamutRgb([f32; 9]),
146    /// Fused u8-sRGB → linear-f32 RGB primaries conversion (cross-depth).
147    /// Output preserves extended range (no clamp).
148    FusedSrgbU8ToLinearF32Rgb([f32; 9]),
149    /// Fused linear-f32 → u8-sRGB RGB primaries conversion (cross-depth).
150    /// Always clamps since u8 can't represent out-of-gamut values.
151    FusedLinearF32ToSrgbU8Rgb([f32; 9]),
152}
153
154impl core::fmt::Debug for ConvertStep {
155    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
156        match self {
157            Self::Identity => f.write_str("Identity"),
158            Self::SwizzleBgraRgba => f.write_str("SwizzleBgraRgba"),
159            Self::RgbToBgra => f.write_str("RgbToBgra"),
160            Self::AddAlpha => f.write_str("AddAlpha"),
161            Self::DropAlpha => f.write_str("DropAlpha"),
162            Self::MatteComposite { r, g, b } => f
163                .debug_struct("MatteComposite")
164                .field("r", r)
165                .field("g", g)
166                .field("b", b)
167                .finish(),
168            Self::GrayToRgb => f.write_str("GrayToRgb"),
169            Self::GrayToRgba => f.write_str("GrayToRgba"),
170            Self::RgbToGray => f.write_str("RgbToGray"),
171            Self::RgbaToGray => f.write_str("RgbaToGray"),
172            Self::GrayAlphaToRgba => f.write_str("GrayAlphaToRgba"),
173            Self::GrayAlphaToRgb => f.write_str("GrayAlphaToRgb"),
174            Self::GrayToGrayAlpha => f.write_str("GrayToGrayAlpha"),
175            Self::GrayAlphaToGray => f.write_str("GrayAlphaToGray"),
176            Self::SrgbU8ToLinearF32 => f.write_str("SrgbU8ToLinearF32"),
177            Self::LinearF32ToSrgbU8 => f.write_str("LinearF32ToSrgbU8"),
178            Self::NaiveU8ToF32 => f.write_str("NaiveU8ToF32"),
179            Self::NaiveF32ToU8 => f.write_str("NaiveF32ToU8"),
180            Self::U16ToU8 => f.write_str("U16ToU8"),
181            Self::U8ToU16 => f.write_str("U8ToU16"),
182            Self::U16ToF32 => f.write_str("U16ToF32"),
183            Self::F32ToU16 => f.write_str("F32ToU16"),
184            Self::PqU16ToLinearF32 => f.write_str("PqU16ToLinearF32"),
185            Self::LinearF32ToPqU16 => f.write_str("LinearF32ToPqU16"),
186            Self::PqF32ToLinearF32 => f.write_str("PqF32ToLinearF32"),
187            Self::LinearF32ToPqF32 => f.write_str("LinearF32ToPqF32"),
188            Self::HlgU16ToLinearF32 => f.write_str("HlgU16ToLinearF32"),
189            Self::LinearF32ToHlgU16 => f.write_str("LinearF32ToHlgU16"),
190            Self::HlgF32ToLinearF32 => f.write_str("HlgF32ToLinearF32"),
191            Self::LinearF32ToHlgF32 => f.write_str("LinearF32ToHlgF32"),
192            Self::SrgbF32ToLinearF32 => f.write_str("SrgbF32ToLinearF32"),
193            Self::LinearF32ToSrgbF32 => f.write_str("LinearF32ToSrgbF32"),
194            Self::SrgbF32ToLinearF32Extended => f.write_str("SrgbF32ToLinearF32Extended"),
195            Self::LinearF32ToSrgbF32Extended => f.write_str("LinearF32ToSrgbF32Extended"),
196            Self::Bt709F32ToLinearF32 => f.write_str("Bt709F32ToLinearF32"),
197            Self::LinearF32ToBt709F32 => f.write_str("LinearF32ToBt709F32"),
198            Self::StraightToPremul => f.write_str("StraightToPremul"),
199            Self::PremulToStraight => f.write_str("PremulToStraight"),
200            Self::LinearRgbToOklab => f.write_str("LinearRgbToOklab"),
201            Self::OklabToLinearRgb => f.write_str("OklabToLinearRgb"),
202            Self::LinearRgbaToOklaba => f.write_str("LinearRgbaToOklaba"),
203            Self::OklabaToLinearRgba => f.write_str("OklabaToLinearRgba"),
204            Self::GamutMatrixRgbF32(m) => f.debug_tuple("GamutMatrixRgbF32").field(m).finish(),
205            Self::GamutMatrixRgbaF32(m) => f.debug_tuple("GamutMatrixRgbaF32").field(m).finish(),
206            Self::FusedSrgbU8GamutRgb(m) => f.debug_tuple("FusedSrgbU8GamutRgb").field(m).finish(),
207            Self::FusedSrgbU8GamutRgba(m) => {
208                f.debug_tuple("FusedSrgbU8GamutRgba").field(m).finish()
209            }
210            Self::FusedSrgbU16GamutRgb(m) => {
211                f.debug_tuple("FusedSrgbU16GamutRgb").field(m).finish()
212            }
213            Self::FusedSrgbU8ToLinearF32Rgb(m) => {
214                f.debug_tuple("FusedSrgbU8ToLinearF32Rgb").field(m).finish()
215            }
216            Self::FusedLinearF32ToSrgbU8Rgb(m) => {
217                f.debug_tuple("FusedLinearF32ToSrgbU8Rgb").field(m).finish()
218            }
219        }
220    }
221}
222
223/// Assert that a descriptor is not CMYK.
224///
225/// CMYK is device-dependent and cannot be converted by zenpixels-convert.
226/// Use a CMS (e.g., moxcms) with an ICC profile for CMYK↔RGB conversion.
227fn assert_not_cmyk(desc: &PixelDescriptor) {
228    assert!(
229        desc.color_model() != crate::ColorModel::Cmyk,
230        "CMYK pixel data cannot be processed by zenpixels-convert. \
231         Use a CMS (e.g., moxcms) with an ICC profile for CMYK↔RGB conversion."
232    );
233}
234
235impl ConvertPlan {
236    /// Create a conversion plan from `from` to `to`.
237    ///
238    /// Returns `Err` if no conversion path exists.
239    ///
240    /// # Panics
241    ///
242    /// Panics if either `from` or `to` uses [`ColorModel::Cmyk`](zenpixels::ColorModel::Cmyk).
243    /// CMYK requires a CMS with an ICC profile for conversion.
244    #[track_caller]
245    pub fn new(from: PixelDescriptor, to: PixelDescriptor) -> Result<Self, At<ConvertError>> {
246        assert_not_cmyk(&from);
247        assert_not_cmyk(&to);
248        if from == to {
249            return Ok(Self {
250                from,
251                to,
252                steps: vec![ConvertStep::Identity],
253            });
254        }
255
256        let mut steps = Vec::with_capacity(3);
257
258        // Step 1: Layout conversion (within same depth class).
259        // Step 2: Depth conversion.
260        // Step 3: Alpha mode conversion.
261        //
262        // For cross-depth conversions, we convert layout at the source depth
263        // first, then change depth. This minimizes the number of channels
264        // we need to depth-convert.
265
266        let need_depth_change = from.channel_type() != to.channel_type();
267        let need_layout_change = from.layout() != to.layout();
268        let need_alpha_change =
269            from.alpha() != to.alpha() && from.alpha().is_some() && to.alpha().is_some();
270
271        // Depth/TF steps are needed when depth changes, or when both are F32
272        // and transfer functions differ.
273        let need_depth_or_tf = need_depth_change
274            || (from.channel_type() == ChannelType::F32 && from.transfer() != to.transfer());
275
276        // If we need to change depth AND layout, plan the optimal order.
277        if need_layout_change {
278            // When going to fewer channels, convert layout first (less depth work).
279            // When going to more channels, convert depth first (less layout work).
280            //
281            // Exception: Oklab layout steps require f32 data. When the source
282            // is integer (U8/U16) and the layout change involves Oklab, we must
283            // convert depth first regardless of channel count.
284            let src_ch = from.layout().channels();
285            let dst_ch = to.layout().channels();
286            let involves_oklab =
287                matches!(from.layout(), ChannelLayout::Oklab | ChannelLayout::OklabA)
288                    || matches!(to.layout(), ChannelLayout::Oklab | ChannelLayout::OklabA);
289
290            // Oklab conversion requires known primaries for the RGB→LMS matrix.
291            if involves_oklab && from.primaries == ColorPrimaries::Unknown {
292                return Err(whereat::at!(ConvertError::NoPath { from, to }));
293            }
294
295            let depth_first = need_depth_or_tf
296                && (dst_ch > src_ch || (involves_oklab && from.channel_type() != ChannelType::F32));
297
298            if depth_first {
299                // Depth first, then layout.
300                steps.extend(
301                    depth_steps(
302                        from.channel_type(),
303                        to.channel_type(),
304                        from.transfer(),
305                        to.transfer(),
306                    )
307                    .map_err(|e| whereat::at!(e))?,
308                );
309                steps.extend(layout_steps(from.layout(), to.layout()));
310            } else {
311                // Layout first, then depth.
312                steps.extend(layout_steps(from.layout(), to.layout()));
313                if need_depth_or_tf {
314                    steps.extend(
315                        depth_steps(
316                            from.channel_type(),
317                            to.channel_type(),
318                            from.transfer(),
319                            to.transfer(),
320                        )
321                        .map_err(|e| whereat::at!(e))?,
322                    );
323                }
324            }
325        } else if need_depth_or_tf {
326            steps.extend(
327                depth_steps(
328                    from.channel_type(),
329                    to.channel_type(),
330                    from.transfer(),
331                    to.transfer(),
332                )
333                .map_err(|e| whereat::at!(e))?,
334            );
335        }
336
337        // Alpha mode conversion (if both have alpha and modes differ).
338        if need_alpha_change {
339            match (from.alpha(), to.alpha()) {
340                (Some(AlphaMode::Straight), Some(AlphaMode::Premultiplied)) => {
341                    steps.push(ConvertStep::StraightToPremul);
342                }
343                (Some(AlphaMode::Premultiplied), Some(AlphaMode::Straight)) => {
344                    steps.push(ConvertStep::PremulToStraight);
345                }
346                _ => {}
347            }
348        }
349
350        // Primaries conversion: if source and destination have different known
351        // primaries, inject a gamut matrix in linear f32 space.
352        let need_primaries = from.primaries != to.primaries
353            && from.primaries != ColorPrimaries::Unknown
354            && to.primaries != ColorPrimaries::Unknown;
355
356        if need_primaries
357            && let Some(matrix) = crate::gamut::conversion_matrix(from.primaries, to.primaries)
358        {
359            // Flatten the 3×3 matrix for storage in the step enum.
360            let flat = [
361                matrix[0][0],
362                matrix[0][1],
363                matrix[0][2],
364                matrix[1][0],
365                matrix[1][1],
366                matrix[1][2],
367                matrix[2][0],
368                matrix[2][1],
369                matrix[2][2],
370            ];
371
372            // The gamut matrix must be applied in linear f32 space.
373            // Check if the existing steps already go through linear f32.
374            let mut goes_through_linear = false;
375            {
376                let mut desc = from;
377                for step in &steps {
378                    desc = intermediate_desc(desc, step);
379                    if desc.channel_type() == ChannelType::F32
380                        && desc.transfer() == TransferFunction::Linear
381                    {
382                        goes_through_linear = true;
383                    }
384                }
385            }
386
387            if goes_through_linear {
388                // Insert the gamut matrix right after the first step that
389                // produces linear f32. All subsequent steps encode to the
390                // target format.
391                let mut insert_pos = 0;
392                let mut desc = from;
393                for (i, step) in steps.iter().enumerate() {
394                    desc = intermediate_desc(desc, step);
395                    if desc.channel_type() == ChannelType::F32
396                        && desc.transfer() == TransferFunction::Linear
397                    {
398                        insert_pos = i + 1;
399                        break;
400                    }
401                }
402                let gamut_step = if desc.layout().has_alpha() {
403                    ConvertStep::GamutMatrixRgbaF32(flat)
404                } else {
405                    ConvertStep::GamutMatrixRgbF32(flat)
406                };
407                steps.insert(insert_pos, gamut_step);
408            } else {
409                // No existing linear f32 step — we must add linearize → gamut → delinearize.
410                // Determine layout for the gamut step.
411                let has_alpha = from.layout().has_alpha() || to.layout().has_alpha();
412                // Use the layout at the current point in the plan.
413                let mut desc = from;
414                for step in &steps {
415                    desc = intermediate_desc(desc, step);
416                }
417                let gamut_step = if desc.layout().has_alpha() || has_alpha {
418                    ConvertStep::GamutMatrixRgbaF32(flat)
419                } else {
420                    ConvertStep::GamutMatrixRgbF32(flat)
421                };
422
423                // Insert linearize → gamut → encode-to-target-tf at the end,
424                // before any alpha mode steps.
425                let linearize = match desc.transfer() {
426                    TransferFunction::Srgb => ConvertStep::SrgbF32ToLinearF32,
427                    TransferFunction::Bt709 => ConvertStep::Bt709F32ToLinearF32,
428                    TransferFunction::Pq => ConvertStep::PqF32ToLinearF32,
429                    TransferFunction::Hlg => ConvertStep::HlgF32ToLinearF32,
430                    TransferFunction::Linear => ConvertStep::Identity,
431                    _ => ConvertStep::SrgbF32ToLinearF32, // assume sRGB for Unknown
432                };
433                let to_target_tf = match to.transfer() {
434                    TransferFunction::Srgb => ConvertStep::LinearF32ToSrgbF32,
435                    TransferFunction::Bt709 => ConvertStep::LinearF32ToBt709F32,
436                    TransferFunction::Pq => ConvertStep::LinearF32ToPqF32,
437                    TransferFunction::Hlg => ConvertStep::LinearF32ToHlgF32,
438                    TransferFunction::Linear => ConvertStep::Identity,
439                    _ => ConvertStep::LinearF32ToSrgbF32, // assume sRGB for Unknown
440                };
441
442                // Need to be in f32 first. If current is integer, add naive conversion.
443                let mut gamut_steps = Vec::new();
444                // Direct fused-step emissions for common cases.
445                if desc.channel_type() == ChannelType::U16
446                    && desc.transfer() == TransferFunction::Srgb
447                    && to.channel_type() == ChannelType::U16
448                    && to.transfer() == TransferFunction::Srgb
449                    && !desc.layout().has_alpha()
450                    && !to.layout().has_alpha()
451                {
452                    // u16 sRGB → u16 sRGB RGB: single-step matlut.
453                    gamut_steps.push(ConvertStep::FusedSrgbU16GamutRgb(flat));
454                    steps.extend(gamut_steps);
455                    if steps.is_empty() {
456                        steps.push(ConvertStep::Identity);
457                    }
458                    fuse_matlut_patterns(&mut steps);
459                    return Ok(Self { from, to, steps });
460                }
461                if desc.channel_type() == ChannelType::U8
462                    && matches!(desc.transfer(), TransferFunction::Srgb)
463                    && to.channel_type() == ChannelType::F32
464                    && to.transfer() == TransferFunction::Linear
465                    && !desc.layout().has_alpha()
466                    && !to.layout().has_alpha()
467                {
468                    // u8 sRGB → linear f32 RGB: cross-depth matlut.
469                    gamut_steps.push(ConvertStep::FusedSrgbU8ToLinearF32Rgb(flat));
470                    steps.extend(gamut_steps);
471                    if steps.is_empty() {
472                        steps.push(ConvertStep::Identity);
473                    }
474                    fuse_matlut_patterns(&mut steps);
475                    return Ok(Self { from, to, steps });
476                }
477                if desc.channel_type() == ChannelType::F32
478                    && desc.transfer() == TransferFunction::Linear
479                    && to.channel_type() == ChannelType::U8
480                    && to.transfer() == TransferFunction::Srgb
481                    && !desc.layout().has_alpha()
482                    && !to.layout().has_alpha()
483                {
484                    // linear f32 → u8 sRGB RGB: cross-depth matlut.
485                    gamut_steps.push(ConvertStep::FusedLinearF32ToSrgbU8Rgb(flat));
486                    steps.extend(gamut_steps);
487                    if steps.is_empty() {
488                        steps.push(ConvertStep::Identity);
489                    }
490                    fuse_matlut_patterns(&mut steps);
491                    return Ok(Self { from, to, steps });
492                }
493                if desc.channel_type() != ChannelType::F32 {
494                    // Use the fused sRGB u8→linear f32 if applicable.
495                    if desc.channel_type() == ChannelType::U8
496                        && matches!(
497                            desc.transfer(),
498                            TransferFunction::Srgb
499                                | TransferFunction::Bt709
500                                | TransferFunction::Unknown
501                        )
502                    {
503                        gamut_steps.push(ConvertStep::SrgbU8ToLinearF32);
504                        // Already linear, skip separate linearize.
505                        gamut_steps.push(gamut_step);
506                        gamut_steps.push(ConvertStep::LinearF32ToSrgbU8);
507                    } else if desc.channel_type() == ChannelType::U16
508                        && desc.transfer() == TransferFunction::Pq
509                    {
510                        gamut_steps.push(ConvertStep::PqU16ToLinearF32);
511                        gamut_steps.push(gamut_step);
512                        gamut_steps.push(ConvertStep::LinearF32ToPqU16);
513                    } else if desc.channel_type() == ChannelType::U16
514                        && desc.transfer() == TransferFunction::Hlg
515                    {
516                        gamut_steps.push(ConvertStep::HlgU16ToLinearF32);
517                        gamut_steps.push(gamut_step);
518                        gamut_steps.push(ConvertStep::LinearF32ToHlgU16);
519                    } else {
520                        // Generic: naive to f32, linearize, gamut, delinearize, naive back
521                        gamut_steps.push(ConvertStep::NaiveU8ToF32);
522                        if !matches!(linearize, ConvertStep::Identity) {
523                            gamut_steps.push(linearize);
524                        }
525                        gamut_steps.push(gamut_step);
526                        if !matches!(to_target_tf, ConvertStep::Identity) {
527                            gamut_steps.push(to_target_tf);
528                        }
529                        gamut_steps.push(ConvertStep::NaiveF32ToU8);
530                    }
531                } else {
532                    // Already f32, just linearize → gamut → encode
533                    if !matches!(linearize, ConvertStep::Identity) {
534                        gamut_steps.push(linearize);
535                    }
536                    gamut_steps.push(gamut_step);
537                    if !matches!(to_target_tf, ConvertStep::Identity) {
538                        gamut_steps.push(to_target_tf);
539                    }
540                }
541
542                steps.extend(gamut_steps);
543            }
544        }
545
546        if steps.is_empty() {
547            // Transfer-only difference or alpha-mode-only: identity path.
548            steps.push(ConvertStep::Identity);
549        }
550
551        // Peephole fusion: collapse common 3-step patterns into single fused
552        // kernels that avoid scratch-buffer round-trips.
553        fuse_matlut_patterns(&mut steps);
554
555        Ok(Self { from, to, steps })
556    }
557
558    /// Create a conversion plan with explicit policy enforcement.
559    ///
560    /// Validates that the planned conversion steps are allowed by the given
561    /// policies before creating the plan. Returns an error if a forbidden
562    /// operation would be required.
563    ///
564    /// # Panics
565    ///
566    /// Panics if either `from` or `to` uses [`ColorModel::Cmyk`](zenpixels::ColorModel::Cmyk).
567    /// CMYK requires a CMS with an ICC profile for conversion.
568    #[track_caller]
569    pub fn new_explicit(
570        from: PixelDescriptor,
571        to: PixelDescriptor,
572        options: &ConvertOptions,
573    ) -> Result<Self, At<ConvertError>> {
574        assert_not_cmyk(&from);
575        assert_not_cmyk(&to);
576        // Check alpha removal policy.
577        let drops_alpha = from.alpha().is_some() && to.alpha().is_none();
578        if drops_alpha && options.alpha_policy == AlphaPolicy::Forbid {
579            return Err(whereat::at!(ConvertError::AlphaRemovalForbidden));
580        }
581
582        // Check depth reduction policy.
583        let reduces_depth = from.channel_type().byte_size() > to.channel_type().byte_size();
584        if reduces_depth && options.depth_policy == DepthPolicy::Forbid {
585            return Err(whereat::at!(ConvertError::DepthReductionForbidden));
586        }
587
588        // Check RGB→Gray requires luma coefficients.
589        let src_is_rgb = matches!(
590            from.layout(),
591            ChannelLayout::Rgb | ChannelLayout::Rgba | ChannelLayout::Bgra
592        );
593        let dst_is_gray = matches!(to.layout(), ChannelLayout::Gray | ChannelLayout::GrayAlpha);
594        if src_is_rgb && dst_is_gray && options.luma.is_none() {
595            return Err(whereat::at!(ConvertError::RgbToGray));
596        }
597
598        let mut plan = Self::new(from, to).at()?;
599
600        // Replace DropAlpha with MatteComposite when policy is CompositeOnto.
601        if drops_alpha && let AlphaPolicy::CompositeOnto { r, g, b } = options.alpha_policy {
602            for step in &mut plan.steps {
603                if matches!(step, ConvertStep::DropAlpha) {
604                    *step = ConvertStep::MatteComposite { r, g, b };
605                }
606            }
607        }
608
609        // When the caller opts out of clipping, swap pure-f32 sRGB transfer
610        // steps for their sign-preserving extended-range counterparts.
611        // Fused u8/u16 matlut steps are unaffected (integer I/O can't
612        // represent extended range anyway).
613        if !options.clip_out_of_gamut {
614            for step in &mut plan.steps {
615                match step {
616                    ConvertStep::SrgbF32ToLinearF32 => {
617                        *step = ConvertStep::SrgbF32ToLinearF32Extended;
618                    }
619                    ConvertStep::LinearF32ToSrgbF32 => {
620                        *step = ConvertStep::LinearF32ToSrgbF32Extended;
621                    }
622                    _ => {}
623                }
624            }
625        }
626
627        Ok(plan)
628    }
629
630    /// Create a shell plan that records from/to but has no conversion steps.
631    ///
632    /// Used when an external CMS transform handles the conversion — the
633    /// plan exists only for `from()`/`to()` metadata; the actual row
634    /// work is driven by the external transform stored on `RowConverter`.
635    pub(crate) fn identity(from: PixelDescriptor, to: PixelDescriptor) -> Self {
636        Self {
637            from,
638            to,
639            steps: vec![ConvertStep::Identity],
640        }
641    }
642
643    /// Compose two plans into one: apply `self` then `other`.
644    ///
645    /// The composed plan executes both conversions in a single `convert_row`
646    /// call, using one intermediate buffer instead of two. Adjacent inverse
647    /// steps are cancelled (e.g., `SrgbU8ToLinearF32` + `LinearF32ToSrgbU8`
648    /// → identity).
649    ///
650    /// Returns `None` if `self.to` != `other.from` (incompatible plans).
651    pub fn compose(&self, other: &Self) -> Option<Self> {
652        if self.to != other.from {
653            return None;
654        }
655
656        let mut steps = self.steps.clone();
657
658        // Append other's steps, skipping its Identity if present.
659        for step in &other.steps {
660            if matches!(step, ConvertStep::Identity) {
661                continue;
662            }
663            steps.push(step.clone());
664        }
665
666        // Peephole: cancel adjacent inverse pairs.
667        let mut changed = true;
668        while changed {
669            changed = false;
670            let mut i = 0;
671            while i + 1 < steps.len() {
672                if are_inverse(&steps[i], &steps[i + 1]) {
673                    steps.remove(i + 1);
674                    steps.remove(i);
675                    changed = true;
676                    // Don't advance — check the new adjacent pair.
677                } else {
678                    i += 1;
679                }
680            }
681        }
682
683        // If everything cancelled, produce identity.
684        if steps.is_empty() {
685            steps.push(ConvertStep::Identity);
686        }
687
688        // Remove leading/trailing Identity if there are real steps.
689        if steps.len() > 1 {
690            steps.retain(|s| !matches!(s, ConvertStep::Identity));
691            if steps.is_empty() {
692                steps.push(ConvertStep::Identity);
693            }
694        }
695
696        Some(Self {
697            from: self.from,
698            to: other.to,
699            steps,
700        })
701    }
702
703    /// True if conversion is a no-op.
704    #[must_use]
705    pub fn is_identity(&self) -> bool {
706        self.steps.len() == 1 && matches!(self.steps[0], ConvertStep::Identity)
707    }
708
709    /// Maximum bytes-per-pixel across all intermediate formats in the plan.
710    ///
711    /// Used to pre-allocate scratch buffers for streaming conversion.
712    pub(crate) fn max_intermediate_bpp(&self) -> usize {
713        let mut desc = self.from;
714        let mut max_bpp = desc.bytes_per_pixel();
715        for step in &self.steps {
716            desc = intermediate_desc(desc, step);
717            max_bpp = max_bpp.max(desc.bytes_per_pixel());
718        }
719        max_bpp
720    }
721
722    /// Source descriptor.
723    pub fn from(&self) -> PixelDescriptor {
724        self.from
725    }
726
727    /// Target descriptor.
728    pub fn to(&self) -> PixelDescriptor {
729        self.to
730    }
731}
732
733/// Determine the layout conversion step(s).
734///
735/// Some layout conversions require two steps (e.g., BGRA -> RGB needs
736/// swizzle + drop alpha). Returns up to 2 steps.
737fn layout_steps(from: ChannelLayout, to: ChannelLayout) -> Vec<ConvertStep> {
738    if from == to {
739        return Vec::new();
740    }
741    match (from, to) {
742        (ChannelLayout::Bgra, ChannelLayout::Rgba) | (ChannelLayout::Rgba, ChannelLayout::Bgra) => {
743            vec![ConvertStep::SwizzleBgraRgba]
744        }
745        (ChannelLayout::Rgb, ChannelLayout::Rgba) => vec![ConvertStep::AddAlpha],
746        (ChannelLayout::Rgb, ChannelLayout::Bgra) => {
747            // Single fused SIMD pass (garb::bytes::rgb_to_bgra). For non-u8
748            // channel types `apply_step_u8` falls back to AddAlpha+Swizzle.
749            vec![ConvertStep::RgbToBgra]
750        }
751        (ChannelLayout::Rgba, ChannelLayout::Rgb) => vec![ConvertStep::DropAlpha],
752        (ChannelLayout::Bgra, ChannelLayout::Rgb) => {
753            // BGRA -> RGBA -> RGB: swizzle then drop alpha.
754            vec![ConvertStep::SwizzleBgraRgba, ConvertStep::DropAlpha]
755        }
756        (ChannelLayout::Gray, ChannelLayout::Rgb) => vec![ConvertStep::GrayToRgb],
757        (ChannelLayout::Gray, ChannelLayout::Rgba) => vec![ConvertStep::GrayToRgba],
758        (ChannelLayout::Gray, ChannelLayout::Bgra) => {
759            // Gray -> RGBA -> BGRA: expand then swizzle.
760            vec![ConvertStep::GrayToRgba, ConvertStep::SwizzleBgraRgba]
761        }
762        (ChannelLayout::Rgb, ChannelLayout::Gray) => vec![ConvertStep::RgbToGray],
763        (ChannelLayout::Rgba, ChannelLayout::Gray) => vec![ConvertStep::RgbaToGray],
764        (ChannelLayout::Bgra, ChannelLayout::Gray) => {
765            // BGRA -> RGBA -> Gray: swizzle then to gray.
766            vec![ConvertStep::SwizzleBgraRgba, ConvertStep::RgbaToGray]
767        }
768        (ChannelLayout::GrayAlpha, ChannelLayout::Rgba) => vec![ConvertStep::GrayAlphaToRgba],
769        (ChannelLayout::GrayAlpha, ChannelLayout::Bgra) => {
770            // GrayAlpha -> RGBA -> BGRA: expand then swizzle.
771            vec![ConvertStep::GrayAlphaToRgba, ConvertStep::SwizzleBgraRgba]
772        }
773        (ChannelLayout::GrayAlpha, ChannelLayout::Rgb) => vec![ConvertStep::GrayAlphaToRgb],
774        (ChannelLayout::Gray, ChannelLayout::GrayAlpha) => vec![ConvertStep::GrayToGrayAlpha],
775        (ChannelLayout::GrayAlpha, ChannelLayout::Gray) => vec![ConvertStep::GrayAlphaToGray],
776
777        // Oklab ↔ RGB conversions (via linear RGB).
778        (ChannelLayout::Rgb, ChannelLayout::Oklab) => vec![ConvertStep::LinearRgbToOklab],
779        (ChannelLayout::Oklab, ChannelLayout::Rgb) => vec![ConvertStep::OklabToLinearRgb],
780        (ChannelLayout::Rgba, ChannelLayout::OklabA) => vec![ConvertStep::LinearRgbaToOklaba],
781        (ChannelLayout::OklabA, ChannelLayout::Rgba) => vec![ConvertStep::OklabaToLinearRgba],
782
783        // Oklab ↔ RGB with alpha add/drop.
784        (ChannelLayout::Rgb, ChannelLayout::OklabA) => {
785            vec![ConvertStep::AddAlpha, ConvertStep::LinearRgbaToOklaba]
786        }
787        (ChannelLayout::OklabA, ChannelLayout::Rgb) => {
788            vec![ConvertStep::OklabaToLinearRgba, ConvertStep::DropAlpha]
789        }
790        (ChannelLayout::Oklab, ChannelLayout::Rgba) => {
791            vec![ConvertStep::OklabToLinearRgb, ConvertStep::AddAlpha]
792        }
793        (ChannelLayout::Rgba, ChannelLayout::Oklab) => {
794            vec![ConvertStep::DropAlpha, ConvertStep::LinearRgbToOklab]
795        }
796
797        // Oklab ↔ BGRA (swizzle to/from RGBA, then Oklab).
798        (ChannelLayout::Bgra, ChannelLayout::OklabA) => {
799            vec![
800                ConvertStep::SwizzleBgraRgba,
801                ConvertStep::LinearRgbaToOklaba,
802            ]
803        }
804        (ChannelLayout::OklabA, ChannelLayout::Bgra) => {
805            vec![
806                ConvertStep::OklabaToLinearRgba,
807                ConvertStep::SwizzleBgraRgba,
808            ]
809        }
810        (ChannelLayout::Bgra, ChannelLayout::Oklab) => {
811            vec![
812                ConvertStep::SwizzleBgraRgba,
813                ConvertStep::DropAlpha,
814                ConvertStep::LinearRgbToOklab,
815            ]
816        }
817        (ChannelLayout::Oklab, ChannelLayout::Bgra) => {
818            vec![
819                ConvertStep::OklabToLinearRgb,
820                ConvertStep::AddAlpha,
821                ConvertStep::SwizzleBgraRgba,
822            ]
823        }
824
825        // Gray ↔ Oklab (expand gray to RGB first).
826        (ChannelLayout::Gray, ChannelLayout::Oklab) => {
827            vec![ConvertStep::GrayToRgb, ConvertStep::LinearRgbToOklab]
828        }
829        (ChannelLayout::Oklab, ChannelLayout::Gray) => {
830            vec![ConvertStep::OklabToLinearRgb, ConvertStep::RgbToGray]
831        }
832        (ChannelLayout::Gray, ChannelLayout::OklabA) => {
833            vec![ConvertStep::GrayToRgba, ConvertStep::LinearRgbaToOklaba]
834        }
835        (ChannelLayout::OklabA, ChannelLayout::Gray) => {
836            vec![ConvertStep::OklabaToLinearRgba, ConvertStep::RgbaToGray]
837        }
838        (ChannelLayout::GrayAlpha, ChannelLayout::OklabA) => {
839            vec![
840                ConvertStep::GrayAlphaToRgba,
841                ConvertStep::LinearRgbaToOklaba,
842            ]
843        }
844        (ChannelLayout::OklabA, ChannelLayout::GrayAlpha) => {
845            // Drop alpha from OklabA→Oklab, convert to RGB, then to GrayAlpha.
846            // Alpha is lost; this is inherently lossy.
847            vec![
848                ConvertStep::OklabaToLinearRgba,
849                ConvertStep::RgbaToGray,
850                ConvertStep::GrayToGrayAlpha,
851            ]
852        }
853        (ChannelLayout::GrayAlpha, ChannelLayout::Oklab) => {
854            vec![ConvertStep::GrayAlphaToRgb, ConvertStep::LinearRgbToOklab]
855        }
856        (ChannelLayout::Oklab, ChannelLayout::GrayAlpha) => {
857            vec![
858                ConvertStep::OklabToLinearRgb,
859                ConvertStep::RgbToGray,
860                ConvertStep::GrayToGrayAlpha,
861            ]
862        }
863
864        // Oklab ↔ alpha variants.
865        (ChannelLayout::Oklab, ChannelLayout::OklabA) => vec![ConvertStep::AddAlpha],
866        (ChannelLayout::OklabA, ChannelLayout::Oklab) => vec![ConvertStep::DropAlpha],
867
868        _ => Vec::new(), // Unsupported layout conversion.
869    }
870}
871
872/// Determine the depth conversion step(s), considering transfer functions.
873///
874/// Returns one or two steps. Two steps are needed when the conversion
875/// requires going through an intermediate format (e.g. PQ U16 → sRGB U8
876/// goes PQ U16 → Linear F32 → sRGB U8).
877fn depth_steps(
878    from: ChannelType,
879    to: ChannelType,
880    from_tf: TransferFunction,
881    to_tf: TransferFunction,
882) -> Result<Vec<ConvertStep>, ConvertError> {
883    if from == to && from_tf == to_tf {
884        return Ok(Vec::new());
885    }
886
887    // Same depth, different transfer function.
888    // For integer types, TF changes are metadata-only (no math).
889    // For F32, we can apply EOTF/OETF in place.
890    if from == to && from != ChannelType::F32 {
891        return Ok(Vec::new());
892    }
893
894    if from == to && from == ChannelType::F32 {
895        return match (from_tf, to_tf) {
896            (TransferFunction::Pq, TransferFunction::Linear) => {
897                Ok(vec![ConvertStep::PqF32ToLinearF32])
898            }
899            (TransferFunction::Linear, TransferFunction::Pq) => {
900                Ok(vec![ConvertStep::LinearF32ToPqF32])
901            }
902            (TransferFunction::Hlg, TransferFunction::Linear) => {
903                Ok(vec![ConvertStep::HlgF32ToLinearF32])
904            }
905            (TransferFunction::Linear, TransferFunction::Hlg) => {
906                Ok(vec![ConvertStep::LinearF32ToHlgF32])
907            }
908            // PQ ↔ HLG: go through linear.
909            (TransferFunction::Pq, TransferFunction::Hlg) => Ok(vec![
910                ConvertStep::PqF32ToLinearF32,
911                ConvertStep::LinearF32ToHlgF32,
912            ]),
913            (TransferFunction::Hlg, TransferFunction::Pq) => Ok(vec![
914                ConvertStep::HlgF32ToLinearF32,
915                ConvertStep::LinearF32ToPqF32,
916            ]),
917            (TransferFunction::Srgb, TransferFunction::Linear) => {
918                Ok(vec![ConvertStep::SrgbF32ToLinearF32])
919            }
920            (TransferFunction::Linear, TransferFunction::Srgb) => {
921                Ok(vec![ConvertStep::LinearF32ToSrgbF32])
922            }
923            (TransferFunction::Bt709, TransferFunction::Linear) => {
924                Ok(vec![ConvertStep::Bt709F32ToLinearF32])
925            }
926            (TransferFunction::Linear, TransferFunction::Bt709) => {
927                Ok(vec![ConvertStep::LinearF32ToBt709F32])
928            }
929            // sRGB ↔ BT.709: go through linear.
930            (TransferFunction::Srgb, TransferFunction::Bt709) => Ok(vec![
931                ConvertStep::SrgbF32ToLinearF32,
932                ConvertStep::LinearF32ToBt709F32,
933            ]),
934            (TransferFunction::Bt709, TransferFunction::Srgb) => Ok(vec![
935                ConvertStep::Bt709F32ToLinearF32,
936                ConvertStep::LinearF32ToSrgbF32,
937            ]),
938            // sRGB/BT.709 ↔ PQ/HLG: go through linear.
939            (TransferFunction::Srgb, TransferFunction::Pq) => Ok(vec![
940                ConvertStep::SrgbF32ToLinearF32,
941                ConvertStep::LinearF32ToPqF32,
942            ]),
943            (TransferFunction::Srgb, TransferFunction::Hlg) => Ok(vec![
944                ConvertStep::SrgbF32ToLinearF32,
945                ConvertStep::LinearF32ToHlgF32,
946            ]),
947            (TransferFunction::Pq, TransferFunction::Srgb) => Ok(vec![
948                ConvertStep::PqF32ToLinearF32,
949                ConvertStep::LinearF32ToSrgbF32,
950            ]),
951            (TransferFunction::Hlg, TransferFunction::Srgb) => Ok(vec![
952                ConvertStep::HlgF32ToLinearF32,
953                ConvertStep::LinearF32ToSrgbF32,
954            ]),
955            (TransferFunction::Bt709, TransferFunction::Pq) => Ok(vec![
956                ConvertStep::Bt709F32ToLinearF32,
957                ConvertStep::LinearF32ToPqF32,
958            ]),
959            (TransferFunction::Bt709, TransferFunction::Hlg) => Ok(vec![
960                ConvertStep::Bt709F32ToLinearF32,
961                ConvertStep::LinearF32ToHlgF32,
962            ]),
963            (TransferFunction::Pq, TransferFunction::Bt709) => Ok(vec![
964                ConvertStep::PqF32ToLinearF32,
965                ConvertStep::LinearF32ToBt709F32,
966            ]),
967            (TransferFunction::Hlg, TransferFunction::Bt709) => Ok(vec![
968                ConvertStep::HlgF32ToLinearF32,
969                ConvertStep::LinearF32ToBt709F32,
970            ]),
971            _ => Ok(Vec::new()),
972        };
973    }
974
975    match (from, to) {
976        (ChannelType::U8, ChannelType::F32) => {
977            if (from_tf == TransferFunction::Srgb || from_tf == TransferFunction::Bt709)
978                && to_tf == TransferFunction::Linear
979            {
980                Ok(vec![ConvertStep::SrgbU8ToLinearF32])
981            } else {
982                Ok(vec![ConvertStep::NaiveU8ToF32])
983            }
984        }
985        (ChannelType::F32, ChannelType::U8) => {
986            if from_tf == TransferFunction::Linear
987                && (to_tf == TransferFunction::Srgb || to_tf == TransferFunction::Bt709)
988            {
989                Ok(vec![ConvertStep::LinearF32ToSrgbU8])
990            } else {
991                Ok(vec![ConvertStep::NaiveF32ToU8])
992            }
993        }
994        (ChannelType::U16, ChannelType::F32) => {
995            // PQ/HLG U16 → Linear F32: apply EOTF during conversion.
996            match (from_tf, to_tf) {
997                (TransferFunction::Pq, TransferFunction::Linear) => {
998                    Ok(vec![ConvertStep::PqU16ToLinearF32])
999                }
1000                (TransferFunction::Hlg, TransferFunction::Linear) => {
1001                    Ok(vec![ConvertStep::HlgU16ToLinearF32])
1002                }
1003                _ => Ok(vec![ConvertStep::U16ToF32]),
1004            }
1005        }
1006        (ChannelType::F32, ChannelType::U16) => {
1007            // Linear F32 → PQ/HLG U16: apply OETF during conversion.
1008            match (from_tf, to_tf) {
1009                (TransferFunction::Linear, TransferFunction::Pq) => {
1010                    Ok(vec![ConvertStep::LinearF32ToPqU16])
1011                }
1012                (TransferFunction::Linear, TransferFunction::Hlg) => {
1013                    Ok(vec![ConvertStep::LinearF32ToHlgU16])
1014                }
1015                _ => Ok(vec![ConvertStep::F32ToU16]),
1016            }
1017        }
1018        (ChannelType::U16, ChannelType::U8) => {
1019            // HDR U16 → SDR U8: go through linear F32 with proper EOTF → OETF.
1020            if from_tf == TransferFunction::Pq && to_tf == TransferFunction::Srgb {
1021                Ok(vec![
1022                    ConvertStep::PqU16ToLinearF32,
1023                    ConvertStep::LinearF32ToSrgbU8,
1024                ])
1025            } else if from_tf == TransferFunction::Hlg && to_tf == TransferFunction::Srgb {
1026                Ok(vec![
1027                    ConvertStep::HlgU16ToLinearF32,
1028                    ConvertStep::LinearF32ToSrgbU8,
1029                ])
1030            } else {
1031                Ok(vec![ConvertStep::U16ToU8])
1032            }
1033        }
1034        (ChannelType::U8, ChannelType::U16) => Ok(vec![ConvertStep::U8ToU16]),
1035        _ => Err(ConvertError::NoPath {
1036            from: PixelDescriptor::new(from, ChannelLayout::Rgb, None, from_tf),
1037            to: PixelDescriptor::new(to, ChannelLayout::Rgb, None, to_tf),
1038        }),
1039    }
1040}
1041
1042// ---------------------------------------------------------------------------
1043// Row conversion kernels
1044// ---------------------------------------------------------------------------
1045
1046/// Pre-allocated scratch buffer for multi-step row conversions.
1047///
1048/// Eliminates per-row heap allocation by reusing two ping-pong halves
1049/// of a single buffer across calls. Create once per [`ConvertPlan`],
1050/// then pass to `convert_row_buffered` for each row.
1051pub(crate) struct ConvertScratch {
1052    /// Single allocation split into two halves via `split_at_mut`.
1053    /// Stored as `Vec<u32>` to guarantee 4-byte alignment, which lets
1054    /// garb and bytemuck use fast aligned paths instead of unaligned fallbacks.
1055    buf: Vec<u32>,
1056}
1057
1058impl ConvertScratch {
1059    /// Create empty scratch (buffer grows on first use).
1060    pub(crate) fn new() -> Self {
1061        Self { buf: Vec::new() }
1062    }
1063
1064    /// Ensure the buffer is large enough for two halves of the max
1065    /// intermediate format at the given width.
1066    fn ensure_capacity(&mut self, plan: &ConvertPlan, width: u32) {
1067        let half_bytes = (width as usize) * plan.max_intermediate_bpp();
1068        let total_u32 = (half_bytes * 2).div_ceil(4);
1069        if self.buf.len() < total_u32 {
1070            self.buf.resize(total_u32, 0);
1071        }
1072    }
1073}
1074
1075impl core::fmt::Debug for ConvertScratch {
1076    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1077        f.debug_struct("ConvertScratch")
1078            .field("capacity", &self.buf.capacity())
1079            .finish()
1080    }
1081}
1082
1083/// Convert one row of `width` pixels using a pre-computed plan.
1084///
1085/// `src` and `dst` must be sized for `width` pixels in their respective formats.
1086/// For multi-step plans, an internal scratch buffer is allocated per call.
1087/// Prefer [`RowConverter`](crate::RowConverter) in hot loops (reuses scratch buffers).
1088pub fn convert_row(plan: &ConvertPlan, src: &[u8], dst: &mut [u8], width: u32) {
1089    if plan.is_identity() {
1090        let len = min(src.len(), dst.len());
1091        dst[..len].copy_from_slice(&src[..len]);
1092        return;
1093    }
1094
1095    if plan.steps.len() == 1 {
1096        apply_step_u8(&plan.steps[0], src, dst, width, plan.from, plan.to);
1097        return;
1098    }
1099
1100    // Allocating fallback for one-off calls.
1101    let mut scratch = ConvertScratch::new();
1102    convert_row_buffered(plan, src, dst, width, &mut scratch);
1103}
1104
1105/// Convert one row of `width` pixels, reusing pre-allocated scratch buffers.
1106///
1107/// For multi-step plans this avoids per-row heap allocation by ping-ponging
1108/// between two halves of a scratch buffer. Single-step plans bypass scratch.
1109pub(crate) fn convert_row_buffered(
1110    plan: &ConvertPlan,
1111    src: &[u8],
1112    dst: &mut [u8],
1113    width: u32,
1114    scratch: &mut ConvertScratch,
1115) {
1116    if plan.is_identity() {
1117        let len = min(src.len(), dst.len());
1118        dst[..len].copy_from_slice(&src[..len]);
1119        return;
1120    }
1121
1122    if plan.steps.len() == 1 {
1123        apply_step_u8(&plan.steps[0], src, dst, width, plan.from, plan.to);
1124        return;
1125    }
1126
1127    scratch.ensure_capacity(plan, width);
1128
1129    let buf_bytes: &mut [u8] = bytemuck::cast_slice_mut(&mut scratch.buf);
1130    let half = buf_bytes.len() / 2;
1131    let (buf_a, buf_b) = buf_bytes.split_at_mut(half);
1132
1133    let num_steps = plan.steps.len();
1134    let mut current_desc = plan.from;
1135
1136    for (i, step) in plan.steps.iter().enumerate() {
1137        let is_last = i == num_steps - 1;
1138        let next_desc = if is_last {
1139            plan.to
1140        } else {
1141            intermediate_desc(current_desc, step)
1142        };
1143
1144        let next_len = (width as usize) * next_desc.bytes_per_pixel();
1145        let curr_len = (width as usize) * current_desc.bytes_per_pixel();
1146
1147        // Ping-pong: even steps read src/buf_b and write buf_a;
1148        // odd steps read buf_a and write buf_b. Each branch only
1149        // borrows each half in one mode, satisfying the borrow checker.
1150        if i % 2 == 0 {
1151            let input = if i == 0 { src } else { &buf_b[..curr_len] };
1152            if is_last {
1153                apply_step_u8(step, input, dst, width, current_desc, next_desc);
1154            } else {
1155                apply_step_u8(
1156                    step,
1157                    input,
1158                    &mut buf_a[..next_len],
1159                    width,
1160                    current_desc,
1161                    next_desc,
1162                );
1163            }
1164        } else {
1165            let input = &buf_a[..curr_len];
1166            if is_last {
1167                apply_step_u8(step, input, dst, width, current_desc, next_desc);
1168            } else {
1169                apply_step_u8(
1170                    step,
1171                    input,
1172                    &mut buf_b[..next_len],
1173                    width,
1174                    current_desc,
1175                    next_desc,
1176                );
1177            }
1178        }
1179
1180        current_desc = next_desc;
1181    }
1182}
1183
1184/// Check if two steps are inverses that cancel each other.
1185/// Collapse `[SrgbU8ToLinearF32, GamutMatrix*F32(m), LinearF32ToSrgbU8]`
1186/// into a single fused matlut step. Mutates in place.
1187fn fuse_matlut_patterns(steps: &mut Vec<ConvertStep>) {
1188    let mut i = 0;
1189    while i + 2 < steps.len() {
1190        let rewrite = match (&steps[i], &steps[i + 1], &steps[i + 2]) {
1191            (
1192                ConvertStep::SrgbU8ToLinearF32,
1193                ConvertStep::GamutMatrixRgbF32(m),
1194                ConvertStep::LinearF32ToSrgbU8,
1195            ) => Some(ConvertStep::FusedSrgbU8GamutRgb(*m)),
1196            (
1197                ConvertStep::SrgbU8ToLinearF32,
1198                ConvertStep::GamutMatrixRgbaF32(m),
1199                ConvertStep::LinearF32ToSrgbU8,
1200            ) => Some(ConvertStep::FusedSrgbU8GamutRgba(*m)),
1201            _ => None,
1202        };
1203        if let Some(fused) = rewrite {
1204            steps[i] = fused;
1205            steps.drain(i + 1..i + 3);
1206            continue;
1207        }
1208        i += 1;
1209    }
1210}
1211
1212fn are_inverse(a: &ConvertStep, b: &ConvertStep) -> bool {
1213    matches!(
1214        (a, b),
1215        // Self-inverse
1216        (ConvertStep::SwizzleBgraRgba, ConvertStep::SwizzleBgraRgba)
1217        // Layout inverses (lossless for opaque data)
1218        | (ConvertStep::AddAlpha, ConvertStep::DropAlpha)
1219        // Transfer function f32↔f32 (exact inverses in float)
1220        | (ConvertStep::SrgbF32ToLinearF32, ConvertStep::LinearF32ToSrgbF32)
1221        | (ConvertStep::LinearF32ToSrgbF32, ConvertStep::SrgbF32ToLinearF32)
1222        | (ConvertStep::PqF32ToLinearF32, ConvertStep::LinearF32ToPqF32)
1223        | (ConvertStep::LinearF32ToPqF32, ConvertStep::PqF32ToLinearF32)
1224        | (ConvertStep::HlgF32ToLinearF32, ConvertStep::LinearF32ToHlgF32)
1225        | (ConvertStep::LinearF32ToHlgF32, ConvertStep::HlgF32ToLinearF32)
1226        | (ConvertStep::Bt709F32ToLinearF32, ConvertStep::LinearF32ToBt709F32)
1227        | (ConvertStep::LinearF32ToBt709F32, ConvertStep::Bt709F32ToLinearF32)
1228        // Alpha mode (exact inverses in float)
1229        | (ConvertStep::StraightToPremul, ConvertStep::PremulToStraight)
1230        | (ConvertStep::PremulToStraight, ConvertStep::StraightToPremul)
1231        // Color model (exact inverses in float)
1232        | (ConvertStep::LinearRgbToOklab, ConvertStep::OklabToLinearRgb)
1233        | (ConvertStep::OklabToLinearRgb, ConvertStep::LinearRgbToOklab)
1234        | (ConvertStep::LinearRgbaToOklaba, ConvertStep::OklabaToLinearRgba)
1235        | (ConvertStep::OklabaToLinearRgba, ConvertStep::LinearRgbaToOklaba)
1236        // Cross-depth pairs (near-lossless for same depth class)
1237        | (ConvertStep::NaiveU8ToF32, ConvertStep::NaiveF32ToU8)
1238        | (ConvertStep::NaiveF32ToU8, ConvertStep::NaiveU8ToF32)
1239        | (ConvertStep::U8ToU16, ConvertStep::U16ToU8)
1240        | (ConvertStep::U16ToU8, ConvertStep::U8ToU16)
1241        | (ConvertStep::U16ToF32, ConvertStep::F32ToU16)
1242        | (ConvertStep::F32ToU16, ConvertStep::U16ToF32)
1243        // Cross-depth with transfer (near-lossless roundtrip)
1244        | (ConvertStep::SrgbU8ToLinearF32, ConvertStep::LinearF32ToSrgbU8)
1245        | (ConvertStep::LinearF32ToSrgbU8, ConvertStep::SrgbU8ToLinearF32)
1246        | (ConvertStep::PqU16ToLinearF32, ConvertStep::LinearF32ToPqU16)
1247        | (ConvertStep::LinearF32ToPqU16, ConvertStep::PqU16ToLinearF32)
1248        | (ConvertStep::HlgU16ToLinearF32, ConvertStep::LinearF32ToHlgU16)
1249        | (ConvertStep::LinearF32ToHlgU16, ConvertStep::HlgU16ToLinearF32)
1250        // Extended-range sRGB f32 pairs
1251        | (ConvertStep::SrgbF32ToLinearF32Extended, ConvertStep::LinearF32ToSrgbF32Extended)
1252        | (ConvertStep::LinearF32ToSrgbF32Extended, ConvertStep::SrgbF32ToLinearF32Extended)
1253    )
1254}
1255
1256/// Compute the descriptor after applying one step.
1257fn intermediate_desc(current: PixelDescriptor, step: &ConvertStep) -> PixelDescriptor {
1258    match step {
1259        ConvertStep::Identity => current,
1260        ConvertStep::SwizzleBgraRgba => {
1261            let new_layout = match current.layout() {
1262                ChannelLayout::Bgra => ChannelLayout::Rgba,
1263                ChannelLayout::Rgba => ChannelLayout::Bgra,
1264                other => other,
1265            };
1266            PixelDescriptor::new(
1267                current.channel_type(),
1268                new_layout,
1269                current.alpha(),
1270                current.transfer(),
1271            )
1272        }
1273        ConvertStep::AddAlpha => PixelDescriptor::new(
1274            current.channel_type(),
1275            ChannelLayout::Rgba,
1276            Some(AlphaMode::Straight),
1277            current.transfer(),
1278        ),
1279        ConvertStep::RgbToBgra => PixelDescriptor::new(
1280            current.channel_type(),
1281            ChannelLayout::Bgra,
1282            Some(AlphaMode::Straight),
1283            current.transfer(),
1284        ),
1285        ConvertStep::DropAlpha | ConvertStep::MatteComposite { .. } => PixelDescriptor::new(
1286            current.channel_type(),
1287            ChannelLayout::Rgb,
1288            None,
1289            current.transfer(),
1290        ),
1291        ConvertStep::GrayToRgb => PixelDescriptor::new(
1292            current.channel_type(),
1293            ChannelLayout::Rgb,
1294            None,
1295            current.transfer(),
1296        ),
1297        ConvertStep::GrayToRgba => PixelDescriptor::new(
1298            current.channel_type(),
1299            ChannelLayout::Rgba,
1300            Some(AlphaMode::Straight),
1301            current.transfer(),
1302        ),
1303        ConvertStep::RgbToGray | ConvertStep::RgbaToGray => PixelDescriptor::new(
1304            current.channel_type(),
1305            ChannelLayout::Gray,
1306            None,
1307            current.transfer(),
1308        ),
1309        ConvertStep::GrayAlphaToRgba => PixelDescriptor::new(
1310            current.channel_type(),
1311            ChannelLayout::Rgba,
1312            current.alpha(),
1313            current.transfer(),
1314        ),
1315        ConvertStep::GrayAlphaToRgb => PixelDescriptor::new(
1316            current.channel_type(),
1317            ChannelLayout::Rgb,
1318            None,
1319            current.transfer(),
1320        ),
1321        ConvertStep::GrayToGrayAlpha => PixelDescriptor::new(
1322            current.channel_type(),
1323            ChannelLayout::GrayAlpha,
1324            Some(AlphaMode::Straight),
1325            current.transfer(),
1326        ),
1327        ConvertStep::GrayAlphaToGray => PixelDescriptor::new(
1328            current.channel_type(),
1329            ChannelLayout::Gray,
1330            None,
1331            current.transfer(),
1332        ),
1333        ConvertStep::SrgbU8ToLinearF32
1334        | ConvertStep::NaiveU8ToF32
1335        | ConvertStep::U16ToF32
1336        | ConvertStep::PqU16ToLinearF32
1337        | ConvertStep::HlgU16ToLinearF32
1338        | ConvertStep::PqF32ToLinearF32
1339        | ConvertStep::HlgF32ToLinearF32
1340        | ConvertStep::SrgbF32ToLinearF32
1341        | ConvertStep::SrgbF32ToLinearF32Extended
1342        | ConvertStep::Bt709F32ToLinearF32 => PixelDescriptor::new(
1343            ChannelType::F32,
1344            current.layout(),
1345            current.alpha(),
1346            TransferFunction::Linear,
1347        ),
1348        ConvertStep::LinearF32ToSrgbU8 | ConvertStep::NaiveF32ToU8 | ConvertStep::U16ToU8 => {
1349            PixelDescriptor::new(
1350                ChannelType::U8,
1351                current.layout(),
1352                current.alpha(),
1353                TransferFunction::Srgb,
1354            )
1355        }
1356        ConvertStep::U8ToU16 => PixelDescriptor::new(
1357            ChannelType::U16,
1358            current.layout(),
1359            current.alpha(),
1360            current.transfer(),
1361        ),
1362        ConvertStep::F32ToU16 | ConvertStep::LinearF32ToPqU16 | ConvertStep::LinearF32ToHlgU16 => {
1363            let tf = match step {
1364                ConvertStep::LinearF32ToPqU16 => TransferFunction::Pq,
1365                ConvertStep::LinearF32ToHlgU16 => TransferFunction::Hlg,
1366                _ => current.transfer(),
1367            };
1368            PixelDescriptor::new(ChannelType::U16, current.layout(), current.alpha(), tf)
1369        }
1370        ConvertStep::LinearF32ToPqF32 => PixelDescriptor::new(
1371            ChannelType::F32,
1372            current.layout(),
1373            current.alpha(),
1374            TransferFunction::Pq,
1375        ),
1376        ConvertStep::LinearF32ToHlgF32 => PixelDescriptor::new(
1377            ChannelType::F32,
1378            current.layout(),
1379            current.alpha(),
1380            TransferFunction::Hlg,
1381        ),
1382        ConvertStep::LinearF32ToSrgbF32 | ConvertStep::LinearF32ToSrgbF32Extended => {
1383            PixelDescriptor::new(
1384                ChannelType::F32,
1385                current.layout(),
1386                current.alpha(),
1387                TransferFunction::Srgb,
1388            )
1389        }
1390        ConvertStep::LinearF32ToBt709F32 => PixelDescriptor::new(
1391            ChannelType::F32,
1392            current.layout(),
1393            current.alpha(),
1394            TransferFunction::Bt709,
1395        ),
1396        ConvertStep::StraightToPremul => PixelDescriptor::new(
1397            current.channel_type(),
1398            current.layout(),
1399            Some(AlphaMode::Premultiplied),
1400            current.transfer(),
1401        ),
1402        ConvertStep::PremulToStraight => PixelDescriptor::new(
1403            current.channel_type(),
1404            current.layout(),
1405            Some(AlphaMode::Straight),
1406            current.transfer(),
1407        ),
1408        ConvertStep::LinearRgbToOklab => PixelDescriptor::new(
1409            ChannelType::F32,
1410            ChannelLayout::Oklab,
1411            None,
1412            TransferFunction::Unknown,
1413        )
1414        .with_primaries(current.primaries),
1415        ConvertStep::OklabToLinearRgb => PixelDescriptor::new(
1416            ChannelType::F32,
1417            ChannelLayout::Rgb,
1418            None,
1419            TransferFunction::Linear,
1420        )
1421        .with_primaries(current.primaries),
1422        ConvertStep::LinearRgbaToOklaba => PixelDescriptor::new(
1423            ChannelType::F32,
1424            ChannelLayout::OklabA,
1425            Some(AlphaMode::Straight),
1426            TransferFunction::Unknown,
1427        )
1428        .with_primaries(current.primaries),
1429        ConvertStep::OklabaToLinearRgba => PixelDescriptor::new(
1430            ChannelType::F32,
1431            ChannelLayout::Rgba,
1432            current.alpha(),
1433            TransferFunction::Linear,
1434        )
1435        .with_primaries(current.primaries),
1436
1437        // Gamut matrix: same depth/layout/TF, but primaries change.
1438        // The actual target primaries are embedded in the matrix, not tracked
1439        // here — we mark them as Unknown since the step doesn't carry that info.
1440        // The final plan.to descriptor has the correct primaries.
1441        ConvertStep::GamutMatrixRgbF32(_) => PixelDescriptor::new(
1442            ChannelType::F32,
1443            current.layout(),
1444            current.alpha(),
1445            TransferFunction::Linear,
1446        ),
1447        ConvertStep::GamutMatrixRgbaF32(_) => PixelDescriptor::new(
1448            ChannelType::F32,
1449            current.layout(),
1450            current.alpha(),
1451            TransferFunction::Linear,
1452        ),
1453        // Fused steps: u8 sRGB in, u8 sRGB out (same layout, same alpha).
1454        ConvertStep::FusedSrgbU8GamutRgb(_) | ConvertStep::FusedSrgbU8GamutRgba(_) => {
1455            PixelDescriptor::new(
1456                ChannelType::U8,
1457                current.layout(),
1458                current.alpha(),
1459                TransferFunction::Srgb,
1460            )
1461        }
1462        ConvertStep::FusedSrgbU16GamutRgb(_) => PixelDescriptor::new(
1463            ChannelType::U16,
1464            current.layout(),
1465            current.alpha(),
1466            TransferFunction::Srgb,
1467        ),
1468        ConvertStep::FusedSrgbU8ToLinearF32Rgb(_) => PixelDescriptor::new(
1469            ChannelType::F32,
1470            current.layout(),
1471            current.alpha(),
1472            TransferFunction::Linear,
1473        ),
1474        ConvertStep::FusedLinearF32ToSrgbU8Rgb(_) => PixelDescriptor::new(
1475            ChannelType::U8,
1476            current.layout(),
1477            current.alpha(),
1478            TransferFunction::Srgb,
1479        ),
1480    }
1481}
1482
1483#[path = "convert_kernels.rs"]
1484mod convert_kernels;
1485use convert_kernels::apply_step_u8;
1486pub(crate) use convert_kernels::{hlg_eotf, hlg_oetf, pq_eotf, pq_oetf};