zenpixels_convert/
convert.rs

1//! Row-level pixel conversion kernels.
2//!
3//! Each kernel converts one row of `width` pixels from a source format to
4//! a destination format. Individual step kernels are pure functions with
5//! no allocation. Multi-step plans use [`ConvertScratch`] ping-pong
6//! buffers to avoid per-row heap allocation in streaming loops.
7
8use alloc::vec;
9use alloc::vec::Vec;
10use core::cmp::min;
11
12use crate::policy::{AlphaPolicy, ConvertOptions, DepthPolicy};
13use crate::{
14    AlphaMode, ChannelLayout, ChannelType, ColorPrimaries, ConvertError, PixelDescriptor,
15    TransferFunction,
16};
17use whereat::{At, ResultAtExt};
18
19/// Pre-computed conversion plan.
20///
21/// Stores the chain of steps needed to convert from one format to another.
22/// Created once, applied to every row.
23#[derive(Clone, Debug)]
24pub struct ConvertPlan {
25    pub(crate) from: PixelDescriptor,
26    pub(crate) to: PixelDescriptor,
27    pub(crate) steps: Vec<ConvertStep>,
28}
29
30/// A single conversion step.
31///
32/// Not `Copy` — some variants (e.g., [`ExternalTransform`]) carry an
33/// `Arc`. Peephole rewrites must use `.clone()` or index assignment with
34/// pattern matching instead of `*step` dereferences.
35///
36/// [`ExternalTransform`]: ConvertStep::ExternalTransform
37#[derive(Clone)]
38pub(crate) enum ConvertStep {
39    /// No-op (identity).
40    Identity,
41    /// BGRA → RGBA byte swizzle (or vice versa).
42    SwizzleBgraRgba,
43    /// Add alpha channel (3ch → 4ch), filling with opaque.
44    AddAlpha,
45    /// Drop alpha channel (4ch → 3ch).
46    DropAlpha,
47    /// Composite onto solid matte color, then drop alpha (4ch → 3ch).
48    ///
49    /// Blends in linear light: src and matte are converted from sRGB to linear,
50    /// alpha-blended, then converted back. For f32 data, pixel values are
51    /// assumed already linear; only the sRGB u8 matte is linearized.
52    MatteComposite { r: u8, g: u8, b: u8 },
53    /// Gray → RGB (replicate gray to all 3 channels).
54    GrayToRgb,
55    /// Gray → RGBA (replicate + opaque alpha).
56    GrayToRgba,
57    /// RGB → Gray (BT.709 luma).
58    RgbToGray,
59    /// RGBA → Gray (BT.709 luma, drop alpha).
60    RgbaToGray,
61    /// GrayAlpha → RGBA (replicate gray, keep alpha).
62    GrayAlphaToRgba,
63    /// GrayAlpha → RGB (replicate gray, drop alpha).
64    GrayAlphaToRgb,
65    /// Gray → GrayAlpha (add opaque alpha).
66    GrayToGrayAlpha,
67    /// GrayAlpha → Gray (drop alpha).
68    GrayAlphaToGray,
69    /// sRGB u8 → linear f32 (per channel, EOTF).
70    SrgbU8ToLinearF32,
71    /// Linear f32 → sRGB u8 (per channel, OETF).
72    LinearF32ToSrgbU8,
73    /// Naive u8 → f32 (v / 255.0, no gamma).
74    NaiveU8ToF32,
75    /// Naive f32 → u8 (clamp * 255 + 0.5, no gamma).
76    NaiveF32ToU8,
77    /// u16 → u8 ((v * 255 + 32768) >> 16).
78    U16ToU8,
79    /// u8 → u16 (v * 257).
80    U8ToU16,
81    /// u16 → f32 (v / 65535.0).
82    U16ToF32,
83    /// f32 → u16 (clamp * 65535 + 0.5).
84    F32ToU16,
85    /// PQ (SMPTE ST 2084) u16 → linear f32 (EOTF).
86    PqU16ToLinearF32,
87    /// Linear f32 → PQ u16 (inverse EOTF / OETF).
88    LinearF32ToPqU16,
89    /// PQ f32 [0,1] → linear f32 (EOTF, no depth change).
90    PqF32ToLinearF32,
91    /// Linear f32 → PQ f32 [0,1] (OETF, no depth change).
92    LinearF32ToPqF32,
93    /// HLG (ARIB STD-B67) u16 → linear f32 (EOTF).
94    HlgU16ToLinearF32,
95    /// Linear f32 → HLG u16 (OETF).
96    LinearF32ToHlgU16,
97    /// HLG f32 [0,1] → linear f32 (EOTF, no depth change).
98    HlgF32ToLinearF32,
99    /// Linear f32 → HLG f32 [0,1] (OETF, no depth change).
100    LinearF32ToHlgF32,
101    /// sRGB f32 [0,1] → linear f32 (EOTF, no depth change). Clamps input.
102    SrgbF32ToLinearF32,
103    /// Linear f32 → sRGB f32 [0,1] (OETF, no depth change). Clamps output.
104    LinearF32ToSrgbF32,
105    /// sRGB f32 → linear f32 (EOTF, sign-preserving extended range).
106    /// Emitted when `ConvertOptions::clip_out_of_gamut == false`.
107    SrgbF32ToLinearF32Extended,
108    /// Linear f32 → sRGB f32 (OETF, sign-preserving extended range).
109    LinearF32ToSrgbF32Extended,
110    /// BT.709 f32 [0,1] → linear f32 (EOTF, no depth change).
111    Bt709F32ToLinearF32,
112    /// Linear f32 → BT.709 f32 [0,1] (OETF, no depth change).
113    LinearF32ToBt709F32,
114    /// Straight → Premultiplied alpha.
115    StraightToPremul,
116    /// Premultiplied → Straight alpha.
117    PremulToStraight,
118    /// Linear RGB f32 → Oklab f32 (3-channel color model change).
119    LinearRgbToOklab,
120    /// Oklab f32 → Linear RGB f32 (3-channel color model change).
121    OklabToLinearRgb,
122    /// Linear RGBA f32 → Oklaba f32 (4-channel, alpha preserved).
123    LinearRgbaToOklaba,
124    /// Oklaba f32 → Linear RGBA f32 (4-channel, alpha preserved).
125    OklabaToLinearRgba,
126    /// Apply a 3×3 gamut matrix to linear RGB f32 (3 channels per pixel).
127    ///
128    /// Used for color primaries conversion (e.g., BT.709 ↔ Display P3 ↔ BT.2020).
129    /// Data must be in linear light. The matrix is row-major `[[f32; 3]; 3]`
130    /// flattened to `[f32; 9]`.
131    GamutMatrixRgbF32([f32; 9]),
132    /// Apply a 3×3 gamut matrix to linear RGBA f32 (4 channels, alpha passthrough).
133    GamutMatrixRgbaF32([f32; 9]),
134    /// Fused u8-sRGB RGB primaries conversion: LUT linearize → SIMD matrix →
135    /// SIMD f32→i32 → LUT encode, in one pass. Replaces the 3-step sequence
136    /// `[SrgbU8ToLinearF32, GamutMatrixRgbF32(m), LinearF32ToSrgbU8]`.
137    FusedSrgbU8GamutRgb([f32; 9]),
138    /// Fused u8-sRGB RGBA primaries conversion (alpha passthrough).
139    FusedSrgbU8GamutRgba([f32; 9]),
140    /// Fused u16-sRGB RGB primaries conversion via 65K-entry LUTs.
141    FusedSrgbU16GamutRgb([f32; 9]),
142    /// Fused u8-sRGB → linear-f32 RGB primaries conversion (cross-depth).
143    /// Output preserves extended range (no clamp).
144    FusedSrgbU8ToLinearF32Rgb([f32; 9]),
145    /// Fused linear-f32 → u8-sRGB RGB primaries conversion (cross-depth).
146    /// Always clamps since u8 can't represent out-of-gamut values.
147    FusedLinearF32ToSrgbU8Rgb([f32; 9]),
148}
149
150impl core::fmt::Debug for ConvertStep {
151    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
152        match self {
153            Self::Identity => f.write_str("Identity"),
154            Self::SwizzleBgraRgba => f.write_str("SwizzleBgraRgba"),
155            Self::AddAlpha => f.write_str("AddAlpha"),
156            Self::DropAlpha => f.write_str("DropAlpha"),
157            Self::MatteComposite { r, g, b } => f
158                .debug_struct("MatteComposite")
159                .field("r", r)
160                .field("g", g)
161                .field("b", b)
162                .finish(),
163            Self::GrayToRgb => f.write_str("GrayToRgb"),
164            Self::GrayToRgba => f.write_str("GrayToRgba"),
165            Self::RgbToGray => f.write_str("RgbToGray"),
166            Self::RgbaToGray => f.write_str("RgbaToGray"),
167            Self::GrayAlphaToRgba => f.write_str("GrayAlphaToRgba"),
168            Self::GrayAlphaToRgb => f.write_str("GrayAlphaToRgb"),
169            Self::GrayToGrayAlpha => f.write_str("GrayToGrayAlpha"),
170            Self::GrayAlphaToGray => f.write_str("GrayAlphaToGray"),
171            Self::SrgbU8ToLinearF32 => f.write_str("SrgbU8ToLinearF32"),
172            Self::LinearF32ToSrgbU8 => f.write_str("LinearF32ToSrgbU8"),
173            Self::NaiveU8ToF32 => f.write_str("NaiveU8ToF32"),
174            Self::NaiveF32ToU8 => f.write_str("NaiveF32ToU8"),
175            Self::U16ToU8 => f.write_str("U16ToU8"),
176            Self::U8ToU16 => f.write_str("U8ToU16"),
177            Self::U16ToF32 => f.write_str("U16ToF32"),
178            Self::F32ToU16 => f.write_str("F32ToU16"),
179            Self::PqU16ToLinearF32 => f.write_str("PqU16ToLinearF32"),
180            Self::LinearF32ToPqU16 => f.write_str("LinearF32ToPqU16"),
181            Self::PqF32ToLinearF32 => f.write_str("PqF32ToLinearF32"),
182            Self::LinearF32ToPqF32 => f.write_str("LinearF32ToPqF32"),
183            Self::HlgU16ToLinearF32 => f.write_str("HlgU16ToLinearF32"),
184            Self::LinearF32ToHlgU16 => f.write_str("LinearF32ToHlgU16"),
185            Self::HlgF32ToLinearF32 => f.write_str("HlgF32ToLinearF32"),
186            Self::LinearF32ToHlgF32 => f.write_str("LinearF32ToHlgF32"),
187            Self::SrgbF32ToLinearF32 => f.write_str("SrgbF32ToLinearF32"),
188            Self::LinearF32ToSrgbF32 => f.write_str("LinearF32ToSrgbF32"),
189            Self::SrgbF32ToLinearF32Extended => f.write_str("SrgbF32ToLinearF32Extended"),
190            Self::LinearF32ToSrgbF32Extended => f.write_str("LinearF32ToSrgbF32Extended"),
191            Self::Bt709F32ToLinearF32 => f.write_str("Bt709F32ToLinearF32"),
192            Self::LinearF32ToBt709F32 => f.write_str("LinearF32ToBt709F32"),
193            Self::StraightToPremul => f.write_str("StraightToPremul"),
194            Self::PremulToStraight => f.write_str("PremulToStraight"),
195            Self::LinearRgbToOklab => f.write_str("LinearRgbToOklab"),
196            Self::OklabToLinearRgb => f.write_str("OklabToLinearRgb"),
197            Self::LinearRgbaToOklaba => f.write_str("LinearRgbaToOklaba"),
198            Self::OklabaToLinearRgba => f.write_str("OklabaToLinearRgba"),
199            Self::GamutMatrixRgbF32(m) => f.debug_tuple("GamutMatrixRgbF32").field(m).finish(),
200            Self::GamutMatrixRgbaF32(m) => f.debug_tuple("GamutMatrixRgbaF32").field(m).finish(),
201            Self::FusedSrgbU8GamutRgb(m) => f.debug_tuple("FusedSrgbU8GamutRgb").field(m).finish(),
202            Self::FusedSrgbU8GamutRgba(m) => {
203                f.debug_tuple("FusedSrgbU8GamutRgba").field(m).finish()
204            }
205            Self::FusedSrgbU16GamutRgb(m) => {
206                f.debug_tuple("FusedSrgbU16GamutRgb").field(m).finish()
207            }
208            Self::FusedSrgbU8ToLinearF32Rgb(m) => {
209                f.debug_tuple("FusedSrgbU8ToLinearF32Rgb").field(m).finish()
210            }
211            Self::FusedLinearF32ToSrgbU8Rgb(m) => {
212                f.debug_tuple("FusedLinearF32ToSrgbU8Rgb").field(m).finish()
213            }
214        }
215    }
216}
217
218/// Assert that a descriptor is not CMYK.
219///
220/// CMYK is device-dependent and cannot be converted by zenpixels-convert.
221/// Use a CMS (e.g., moxcms) with an ICC profile for CMYK↔RGB conversion.
222fn assert_not_cmyk(desc: &PixelDescriptor) {
223    assert!(
224        desc.color_model() != crate::ColorModel::Cmyk,
225        "CMYK pixel data cannot be processed by zenpixels-convert. \
226         Use a CMS (e.g., moxcms) with an ICC profile for CMYK↔RGB conversion."
227    );
228}
229
230impl ConvertPlan {
231    /// Create a conversion plan from `from` to `to`.
232    ///
233    /// Returns `Err` if no conversion path exists.
234    ///
235    /// # Panics
236    ///
237    /// Panics if either `from` or `to` uses [`ColorModel::Cmyk`].
238    /// CMYK requires a CMS with an ICC profile for conversion.
239    #[track_caller]
240    pub fn new(from: PixelDescriptor, to: PixelDescriptor) -> Result<Self, At<ConvertError>> {
241        assert_not_cmyk(&from);
242        assert_not_cmyk(&to);
243        if from == to {
244            return Ok(Self {
245                from,
246                to,
247                steps: vec![ConvertStep::Identity],
248            });
249        }
250
251        let mut steps = Vec::with_capacity(3);
252
253        // Step 1: Layout conversion (within same depth class).
254        // Step 2: Depth conversion.
255        // Step 3: Alpha mode conversion.
256        //
257        // For cross-depth conversions, we convert layout at the source depth
258        // first, then change depth. This minimizes the number of channels
259        // we need to depth-convert.
260
261        let need_depth_change = from.channel_type() != to.channel_type();
262        let need_layout_change = from.layout() != to.layout();
263        let need_alpha_change =
264            from.alpha() != to.alpha() && from.alpha().is_some() && to.alpha().is_some();
265
266        // Depth/TF steps are needed when depth changes, or when both are F32
267        // and transfer functions differ.
268        let need_depth_or_tf = need_depth_change
269            || (from.channel_type() == ChannelType::F32 && from.transfer() != to.transfer());
270
271        // If we need to change depth AND layout, plan the optimal order.
272        if need_layout_change {
273            // When going to fewer channels, convert layout first (less depth work).
274            // When going to more channels, convert depth first (less layout work).
275            //
276            // Exception: Oklab layout steps require f32 data. When the source
277            // is integer (U8/U16) and the layout change involves Oklab, we must
278            // convert depth first regardless of channel count.
279            let src_ch = from.layout().channels();
280            let dst_ch = to.layout().channels();
281            let involves_oklab =
282                matches!(from.layout(), ChannelLayout::Oklab | ChannelLayout::OklabA)
283                    || matches!(to.layout(), ChannelLayout::Oklab | ChannelLayout::OklabA);
284
285            // Oklab conversion requires known primaries for the RGB→LMS matrix.
286            if involves_oklab && from.primaries == ColorPrimaries::Unknown {
287                return Err(whereat::at!(ConvertError::NoPath { from, to }));
288            }
289
290            let depth_first = need_depth_or_tf
291                && (dst_ch > src_ch || (involves_oklab && from.channel_type() != ChannelType::F32));
292
293            if depth_first {
294                // Depth first, then layout.
295                steps.extend(
296                    depth_steps(
297                        from.channel_type(),
298                        to.channel_type(),
299                        from.transfer(),
300                        to.transfer(),
301                    )
302                    .map_err(|e| whereat::at!(e))?,
303                );
304                steps.extend(layout_steps(from.layout(), to.layout()));
305            } else {
306                // Layout first, then depth.
307                steps.extend(layout_steps(from.layout(), to.layout()));
308                if need_depth_or_tf {
309                    steps.extend(
310                        depth_steps(
311                            from.channel_type(),
312                            to.channel_type(),
313                            from.transfer(),
314                            to.transfer(),
315                        )
316                        .map_err(|e| whereat::at!(e))?,
317                    );
318                }
319            }
320        } else if need_depth_or_tf {
321            steps.extend(
322                depth_steps(
323                    from.channel_type(),
324                    to.channel_type(),
325                    from.transfer(),
326                    to.transfer(),
327                )
328                .map_err(|e| whereat::at!(e))?,
329            );
330        }
331
332        // Alpha mode conversion (if both have alpha and modes differ).
333        if need_alpha_change {
334            match (from.alpha(), to.alpha()) {
335                (Some(AlphaMode::Straight), Some(AlphaMode::Premultiplied)) => {
336                    steps.push(ConvertStep::StraightToPremul);
337                }
338                (Some(AlphaMode::Premultiplied), Some(AlphaMode::Straight)) => {
339                    steps.push(ConvertStep::PremulToStraight);
340                }
341                _ => {}
342            }
343        }
344
345        // Primaries conversion: if source and destination have different known
346        // primaries, inject a gamut matrix in linear f32 space.
347        let need_primaries = from.primaries != to.primaries
348            && from.primaries != ColorPrimaries::Unknown
349            && to.primaries != ColorPrimaries::Unknown;
350
351        if need_primaries
352            && let Some(matrix) = crate::gamut::conversion_matrix(from.primaries, to.primaries)
353        {
354            // Flatten the 3×3 matrix for storage in the step enum.
355            let flat = [
356                matrix[0][0],
357                matrix[0][1],
358                matrix[0][2],
359                matrix[1][0],
360                matrix[1][1],
361                matrix[1][2],
362                matrix[2][0],
363                matrix[2][1],
364                matrix[2][2],
365            ];
366
367            // The gamut matrix must be applied in linear f32 space.
368            // Check if the existing steps already go through linear f32.
369            let mut goes_through_linear = false;
370            {
371                let mut desc = from;
372                for step in &steps {
373                    desc = intermediate_desc(desc, step);
374                    if desc.channel_type() == ChannelType::F32
375                        && desc.transfer() == TransferFunction::Linear
376                    {
377                        goes_through_linear = true;
378                    }
379                }
380            }
381
382            if goes_through_linear {
383                // Insert the gamut matrix right after the first step that
384                // produces linear f32. All subsequent steps encode to the
385                // target format.
386                let mut insert_pos = 0;
387                let mut desc = from;
388                for (i, step) in steps.iter().enumerate() {
389                    desc = intermediate_desc(desc, step);
390                    if desc.channel_type() == ChannelType::F32
391                        && desc.transfer() == TransferFunction::Linear
392                    {
393                        insert_pos = i + 1;
394                        break;
395                    }
396                }
397                let gamut_step = if desc.layout().has_alpha() {
398                    ConvertStep::GamutMatrixRgbaF32(flat)
399                } else {
400                    ConvertStep::GamutMatrixRgbF32(flat)
401                };
402                steps.insert(insert_pos, gamut_step);
403            } else {
404                // No existing linear f32 step — we must add linearize → gamut → delinearize.
405                // Determine layout for the gamut step.
406                let has_alpha = from.layout().has_alpha() || to.layout().has_alpha();
407                // Use the layout at the current point in the plan.
408                let mut desc = from;
409                for step in &steps {
410                    desc = intermediate_desc(desc, step);
411                }
412                let gamut_step = if desc.layout().has_alpha() || has_alpha {
413                    ConvertStep::GamutMatrixRgbaF32(flat)
414                } else {
415                    ConvertStep::GamutMatrixRgbF32(flat)
416                };
417
418                // Insert linearize → gamut → encode-to-target-tf at the end,
419                // before any alpha mode steps.
420                let linearize = match desc.transfer() {
421                    TransferFunction::Srgb => ConvertStep::SrgbF32ToLinearF32,
422                    TransferFunction::Bt709 => ConvertStep::Bt709F32ToLinearF32,
423                    TransferFunction::Pq => ConvertStep::PqF32ToLinearF32,
424                    TransferFunction::Hlg => ConvertStep::HlgF32ToLinearF32,
425                    TransferFunction::Linear => ConvertStep::Identity,
426                    _ => ConvertStep::SrgbF32ToLinearF32, // assume sRGB for Unknown
427                };
428                let to_target_tf = match to.transfer() {
429                    TransferFunction::Srgb => ConvertStep::LinearF32ToSrgbF32,
430                    TransferFunction::Bt709 => ConvertStep::LinearF32ToBt709F32,
431                    TransferFunction::Pq => ConvertStep::LinearF32ToPqF32,
432                    TransferFunction::Hlg => ConvertStep::LinearF32ToHlgF32,
433                    TransferFunction::Linear => ConvertStep::Identity,
434                    _ => ConvertStep::LinearF32ToSrgbF32, // assume sRGB for Unknown
435                };
436
437                // Need to be in f32 first. If current is integer, add naive conversion.
438                let mut gamut_steps = Vec::new();
439                // Direct fused-step emissions for common cases.
440                if desc.channel_type() == ChannelType::U16
441                    && desc.transfer() == TransferFunction::Srgb
442                    && to.channel_type() == ChannelType::U16
443                    && to.transfer() == TransferFunction::Srgb
444                    && !desc.layout().has_alpha()
445                    && !to.layout().has_alpha()
446                {
447                    // u16 sRGB → u16 sRGB RGB: single-step matlut.
448                    gamut_steps.push(ConvertStep::FusedSrgbU16GamutRgb(flat));
449                    steps.extend(gamut_steps);
450                    if steps.is_empty() {
451                        steps.push(ConvertStep::Identity);
452                    }
453                    fuse_matlut_patterns(&mut steps);
454                    return Ok(Self { from, to, steps });
455                }
456                if desc.channel_type() == ChannelType::U8
457                    && matches!(desc.transfer(), TransferFunction::Srgb)
458                    && to.channel_type() == ChannelType::F32
459                    && to.transfer() == TransferFunction::Linear
460                    && !desc.layout().has_alpha()
461                    && !to.layout().has_alpha()
462                {
463                    // u8 sRGB → linear f32 RGB: cross-depth matlut.
464                    gamut_steps.push(ConvertStep::FusedSrgbU8ToLinearF32Rgb(flat));
465                    steps.extend(gamut_steps);
466                    if steps.is_empty() {
467                        steps.push(ConvertStep::Identity);
468                    }
469                    fuse_matlut_patterns(&mut steps);
470                    return Ok(Self { from, to, steps });
471                }
472                if desc.channel_type() == ChannelType::F32
473                    && desc.transfer() == TransferFunction::Linear
474                    && to.channel_type() == ChannelType::U8
475                    && to.transfer() == TransferFunction::Srgb
476                    && !desc.layout().has_alpha()
477                    && !to.layout().has_alpha()
478                {
479                    // linear f32 → u8 sRGB RGB: cross-depth matlut.
480                    gamut_steps.push(ConvertStep::FusedLinearF32ToSrgbU8Rgb(flat));
481                    steps.extend(gamut_steps);
482                    if steps.is_empty() {
483                        steps.push(ConvertStep::Identity);
484                    }
485                    fuse_matlut_patterns(&mut steps);
486                    return Ok(Self { from, to, steps });
487                }
488                if desc.channel_type() != ChannelType::F32 {
489                    // Use the fused sRGB u8→linear f32 if applicable.
490                    if desc.channel_type() == ChannelType::U8
491                        && matches!(
492                            desc.transfer(),
493                            TransferFunction::Srgb
494                                | TransferFunction::Bt709
495                                | TransferFunction::Unknown
496                        )
497                    {
498                        gamut_steps.push(ConvertStep::SrgbU8ToLinearF32);
499                        // Already linear, skip separate linearize.
500                        gamut_steps.push(gamut_step);
501                        gamut_steps.push(ConvertStep::LinearF32ToSrgbU8);
502                    } else if desc.channel_type() == ChannelType::U16
503                        && desc.transfer() == TransferFunction::Pq
504                    {
505                        gamut_steps.push(ConvertStep::PqU16ToLinearF32);
506                        gamut_steps.push(gamut_step);
507                        gamut_steps.push(ConvertStep::LinearF32ToPqU16);
508                    } else if desc.channel_type() == ChannelType::U16
509                        && desc.transfer() == TransferFunction::Hlg
510                    {
511                        gamut_steps.push(ConvertStep::HlgU16ToLinearF32);
512                        gamut_steps.push(gamut_step);
513                        gamut_steps.push(ConvertStep::LinearF32ToHlgU16);
514                    } else {
515                        // Generic: naive to f32, linearize, gamut, delinearize, naive back
516                        gamut_steps.push(ConvertStep::NaiveU8ToF32);
517                        if !matches!(linearize, ConvertStep::Identity) {
518                            gamut_steps.push(linearize);
519                        }
520                        gamut_steps.push(gamut_step);
521                        if !matches!(to_target_tf, ConvertStep::Identity) {
522                            gamut_steps.push(to_target_tf);
523                        }
524                        gamut_steps.push(ConvertStep::NaiveF32ToU8);
525                    }
526                } else {
527                    // Already f32, just linearize → gamut → encode
528                    if !matches!(linearize, ConvertStep::Identity) {
529                        gamut_steps.push(linearize);
530                    }
531                    gamut_steps.push(gamut_step);
532                    if !matches!(to_target_tf, ConvertStep::Identity) {
533                        gamut_steps.push(to_target_tf);
534                    }
535                }
536
537                steps.extend(gamut_steps);
538            }
539        }
540
541        if steps.is_empty() {
542            // Transfer-only difference or alpha-mode-only: identity path.
543            steps.push(ConvertStep::Identity);
544        }
545
546        // Peephole fusion: collapse common 3-step patterns into single fused
547        // kernels that avoid scratch-buffer round-trips.
548        fuse_matlut_patterns(&mut steps);
549
550        Ok(Self { from, to, steps })
551    }
552
553    /// Create a conversion plan with explicit policy enforcement.
554    ///
555    /// Validates that the planned conversion steps are allowed by the given
556    /// policies before creating the plan. Returns an error if a forbidden
557    /// operation would be required.
558    ///
559    /// # Panics
560    ///
561    /// Panics if either `from` or `to` uses [`ColorModel::Cmyk`].
562    /// CMYK requires a CMS with an ICC profile for conversion.
563    #[track_caller]
564    pub fn new_explicit(
565        from: PixelDescriptor,
566        to: PixelDescriptor,
567        options: &ConvertOptions,
568    ) -> Result<Self, At<ConvertError>> {
569        assert_not_cmyk(&from);
570        assert_not_cmyk(&to);
571        // Check alpha removal policy.
572        let drops_alpha = from.alpha().is_some() && to.alpha().is_none();
573        if drops_alpha && options.alpha_policy == AlphaPolicy::Forbid {
574            return Err(whereat::at!(ConvertError::AlphaRemovalForbidden));
575        }
576
577        // Check depth reduction policy.
578        let reduces_depth = from.channel_type().byte_size() > to.channel_type().byte_size();
579        if reduces_depth && options.depth_policy == DepthPolicy::Forbid {
580            return Err(whereat::at!(ConvertError::DepthReductionForbidden));
581        }
582
583        // Check RGB→Gray requires luma coefficients.
584        let src_is_rgb = matches!(
585            from.layout(),
586            ChannelLayout::Rgb | ChannelLayout::Rgba | ChannelLayout::Bgra
587        );
588        let dst_is_gray = matches!(to.layout(), ChannelLayout::Gray | ChannelLayout::GrayAlpha);
589        if src_is_rgb && dst_is_gray && options.luma.is_none() {
590            return Err(whereat::at!(ConvertError::RgbToGray));
591        }
592
593        let mut plan = Self::new(from, to).at()?;
594
595        // Replace DropAlpha with MatteComposite when policy is CompositeOnto.
596        if drops_alpha && let AlphaPolicy::CompositeOnto { r, g, b } = options.alpha_policy {
597            for step in &mut plan.steps {
598                if matches!(step, ConvertStep::DropAlpha) {
599                    *step = ConvertStep::MatteComposite { r, g, b };
600                }
601            }
602        }
603
604        // When the caller opts out of clipping, swap pure-f32 sRGB transfer
605        // steps for their sign-preserving extended-range counterparts.
606        // Fused u8/u16 matlut steps are unaffected (integer I/O can't
607        // represent extended range anyway).
608        if !options.clip_out_of_gamut {
609            for step in &mut plan.steps {
610                match step {
611                    ConvertStep::SrgbF32ToLinearF32 => {
612                        *step = ConvertStep::SrgbF32ToLinearF32Extended;
613                    }
614                    ConvertStep::LinearF32ToSrgbF32 => {
615                        *step = ConvertStep::LinearF32ToSrgbF32Extended;
616                    }
617                    _ => {}
618                }
619            }
620        }
621
622        Ok(plan)
623    }
624
625    /// Create a shell plan that records from/to but has no conversion steps.
626    ///
627    /// Used when an external CMS transform handles the conversion — the
628    /// plan exists only for `from()`/`to()` metadata; the actual row
629    /// work is driven by the external transform stored on `RowConverter`.
630    pub(crate) fn identity(from: PixelDescriptor, to: PixelDescriptor) -> Self {
631        Self {
632            from,
633            to,
634            steps: vec![ConvertStep::Identity],
635        }
636    }
637
638    /// Compose two plans into one: apply `self` then `other`.
639    ///
640    /// The composed plan executes both conversions in a single `convert_row`
641    /// call, using one intermediate buffer instead of two. Adjacent inverse
642    /// steps are cancelled (e.g., `SrgbU8ToLinearF32` + `LinearF32ToSrgbU8`
643    /// → identity).
644    ///
645    /// Returns `None` if `self.to` != `other.from` (incompatible plans).
646    pub fn compose(&self, other: &Self) -> Option<Self> {
647        if self.to != other.from {
648            return None;
649        }
650
651        let mut steps = self.steps.clone();
652
653        // Append other's steps, skipping its Identity if present.
654        for step in &other.steps {
655            if matches!(step, ConvertStep::Identity) {
656                continue;
657            }
658            steps.push(step.clone());
659        }
660
661        // Peephole: cancel adjacent inverse pairs.
662        let mut changed = true;
663        while changed {
664            changed = false;
665            let mut i = 0;
666            while i + 1 < steps.len() {
667                if are_inverse(&steps[i], &steps[i + 1]) {
668                    steps.remove(i + 1);
669                    steps.remove(i);
670                    changed = true;
671                    // Don't advance — check the new adjacent pair.
672                } else {
673                    i += 1;
674                }
675            }
676        }
677
678        // If everything cancelled, produce identity.
679        if steps.is_empty() {
680            steps.push(ConvertStep::Identity);
681        }
682
683        // Remove leading/trailing Identity if there are real steps.
684        if steps.len() > 1 {
685            steps.retain(|s| !matches!(s, ConvertStep::Identity));
686            if steps.is_empty() {
687                steps.push(ConvertStep::Identity);
688            }
689        }
690
691        Some(Self {
692            from: self.from,
693            to: other.to,
694            steps,
695        })
696    }
697
698    /// True if conversion is a no-op.
699    #[must_use]
700    pub fn is_identity(&self) -> bool {
701        self.steps.len() == 1 && matches!(self.steps[0], ConvertStep::Identity)
702    }
703
704    /// Maximum bytes-per-pixel across all intermediate formats in the plan.
705    ///
706    /// Used to pre-allocate scratch buffers for streaming conversion.
707    pub(crate) fn max_intermediate_bpp(&self) -> usize {
708        let mut desc = self.from;
709        let mut max_bpp = desc.bytes_per_pixel();
710        for step in &self.steps {
711            desc = intermediate_desc(desc, step);
712            max_bpp = max_bpp.max(desc.bytes_per_pixel());
713        }
714        max_bpp
715    }
716
717    /// Source descriptor.
718    pub fn from(&self) -> PixelDescriptor {
719        self.from
720    }
721
722    /// Target descriptor.
723    pub fn to(&self) -> PixelDescriptor {
724        self.to
725    }
726}
727
728/// Determine the layout conversion step(s).
729///
730/// Some layout conversions require two steps (e.g., BGRA -> RGB needs
731/// swizzle + drop alpha). Returns up to 2 steps.
732fn layout_steps(from: ChannelLayout, to: ChannelLayout) -> Vec<ConvertStep> {
733    if from == to {
734        return Vec::new();
735    }
736    match (from, to) {
737        (ChannelLayout::Bgra, ChannelLayout::Rgba) | (ChannelLayout::Rgba, ChannelLayout::Bgra) => {
738            vec![ConvertStep::SwizzleBgraRgba]
739        }
740        (ChannelLayout::Rgb, ChannelLayout::Rgba) => vec![ConvertStep::AddAlpha],
741        (ChannelLayout::Rgb, ChannelLayout::Bgra) => {
742            // Rgb -> RGBA -> BGRA: add alpha then swizzle.
743            vec![ConvertStep::AddAlpha, ConvertStep::SwizzleBgraRgba]
744        }
745        (ChannelLayout::Rgba, ChannelLayout::Rgb) => vec![ConvertStep::DropAlpha],
746        (ChannelLayout::Bgra, ChannelLayout::Rgb) => {
747            // BGRA -> RGBA -> RGB: swizzle then drop alpha.
748            vec![ConvertStep::SwizzleBgraRgba, ConvertStep::DropAlpha]
749        }
750        (ChannelLayout::Gray, ChannelLayout::Rgb) => vec![ConvertStep::GrayToRgb],
751        (ChannelLayout::Gray, ChannelLayout::Rgba) => vec![ConvertStep::GrayToRgba],
752        (ChannelLayout::Gray, ChannelLayout::Bgra) => {
753            // Gray -> RGBA -> BGRA: expand then swizzle.
754            vec![ConvertStep::GrayToRgba, ConvertStep::SwizzleBgraRgba]
755        }
756        (ChannelLayout::Rgb, ChannelLayout::Gray) => vec![ConvertStep::RgbToGray],
757        (ChannelLayout::Rgba, ChannelLayout::Gray) => vec![ConvertStep::RgbaToGray],
758        (ChannelLayout::Bgra, ChannelLayout::Gray) => {
759            // BGRA -> RGBA -> Gray: swizzle then to gray.
760            vec![ConvertStep::SwizzleBgraRgba, ConvertStep::RgbaToGray]
761        }
762        (ChannelLayout::GrayAlpha, ChannelLayout::Rgba) => vec![ConvertStep::GrayAlphaToRgba],
763        (ChannelLayout::GrayAlpha, ChannelLayout::Bgra) => {
764            // GrayAlpha -> RGBA -> BGRA: expand then swizzle.
765            vec![ConvertStep::GrayAlphaToRgba, ConvertStep::SwizzleBgraRgba]
766        }
767        (ChannelLayout::GrayAlpha, ChannelLayout::Rgb) => vec![ConvertStep::GrayAlphaToRgb],
768        (ChannelLayout::Gray, ChannelLayout::GrayAlpha) => vec![ConvertStep::GrayToGrayAlpha],
769        (ChannelLayout::GrayAlpha, ChannelLayout::Gray) => vec![ConvertStep::GrayAlphaToGray],
770
771        // Oklab ↔ RGB conversions (via linear RGB).
772        (ChannelLayout::Rgb, ChannelLayout::Oklab) => vec![ConvertStep::LinearRgbToOklab],
773        (ChannelLayout::Oklab, ChannelLayout::Rgb) => vec![ConvertStep::OklabToLinearRgb],
774        (ChannelLayout::Rgba, ChannelLayout::OklabA) => vec![ConvertStep::LinearRgbaToOklaba],
775        (ChannelLayout::OklabA, ChannelLayout::Rgba) => vec![ConvertStep::OklabaToLinearRgba],
776
777        // Oklab ↔ RGB with alpha add/drop.
778        (ChannelLayout::Rgb, ChannelLayout::OklabA) => {
779            vec![ConvertStep::AddAlpha, ConvertStep::LinearRgbaToOklaba]
780        }
781        (ChannelLayout::OklabA, ChannelLayout::Rgb) => {
782            vec![ConvertStep::OklabaToLinearRgba, ConvertStep::DropAlpha]
783        }
784        (ChannelLayout::Oklab, ChannelLayout::Rgba) => {
785            vec![ConvertStep::OklabToLinearRgb, ConvertStep::AddAlpha]
786        }
787        (ChannelLayout::Rgba, ChannelLayout::Oklab) => {
788            vec![ConvertStep::DropAlpha, ConvertStep::LinearRgbToOklab]
789        }
790
791        // Oklab ↔ BGRA (swizzle to/from RGBA, then Oklab).
792        (ChannelLayout::Bgra, ChannelLayout::OklabA) => {
793            vec![
794                ConvertStep::SwizzleBgraRgba,
795                ConvertStep::LinearRgbaToOklaba,
796            ]
797        }
798        (ChannelLayout::OklabA, ChannelLayout::Bgra) => {
799            vec![
800                ConvertStep::OklabaToLinearRgba,
801                ConvertStep::SwizzleBgraRgba,
802            ]
803        }
804        (ChannelLayout::Bgra, ChannelLayout::Oklab) => {
805            vec![
806                ConvertStep::SwizzleBgraRgba,
807                ConvertStep::DropAlpha,
808                ConvertStep::LinearRgbToOklab,
809            ]
810        }
811        (ChannelLayout::Oklab, ChannelLayout::Bgra) => {
812            vec![
813                ConvertStep::OklabToLinearRgb,
814                ConvertStep::AddAlpha,
815                ConvertStep::SwizzleBgraRgba,
816            ]
817        }
818
819        // Gray ↔ Oklab (expand gray to RGB first).
820        (ChannelLayout::Gray, ChannelLayout::Oklab) => {
821            vec![ConvertStep::GrayToRgb, ConvertStep::LinearRgbToOklab]
822        }
823        (ChannelLayout::Oklab, ChannelLayout::Gray) => {
824            vec![ConvertStep::OklabToLinearRgb, ConvertStep::RgbToGray]
825        }
826        (ChannelLayout::Gray, ChannelLayout::OklabA) => {
827            vec![ConvertStep::GrayToRgba, ConvertStep::LinearRgbaToOklaba]
828        }
829        (ChannelLayout::OklabA, ChannelLayout::Gray) => {
830            vec![ConvertStep::OklabaToLinearRgba, ConvertStep::RgbaToGray]
831        }
832        (ChannelLayout::GrayAlpha, ChannelLayout::OklabA) => {
833            vec![
834                ConvertStep::GrayAlphaToRgba,
835                ConvertStep::LinearRgbaToOklaba,
836            ]
837        }
838        (ChannelLayout::OklabA, ChannelLayout::GrayAlpha) => {
839            // Drop alpha from OklabA→Oklab, convert to RGB, then to GrayAlpha.
840            // Alpha is lost; this is inherently lossy.
841            vec![
842                ConvertStep::OklabaToLinearRgba,
843                ConvertStep::RgbaToGray,
844                ConvertStep::GrayToGrayAlpha,
845            ]
846        }
847        (ChannelLayout::GrayAlpha, ChannelLayout::Oklab) => {
848            vec![ConvertStep::GrayAlphaToRgb, ConvertStep::LinearRgbToOklab]
849        }
850        (ChannelLayout::Oklab, ChannelLayout::GrayAlpha) => {
851            vec![
852                ConvertStep::OklabToLinearRgb,
853                ConvertStep::RgbToGray,
854                ConvertStep::GrayToGrayAlpha,
855            ]
856        }
857
858        // Oklab ↔ alpha variants.
859        (ChannelLayout::Oklab, ChannelLayout::OklabA) => vec![ConvertStep::AddAlpha],
860        (ChannelLayout::OklabA, ChannelLayout::Oklab) => vec![ConvertStep::DropAlpha],
861
862        _ => Vec::new(), // Unsupported layout conversion.
863    }
864}
865
866/// Determine the depth conversion step(s), considering transfer functions.
867///
868/// Returns one or two steps. Two steps are needed when the conversion
869/// requires going through an intermediate format (e.g. PQ U16 → sRGB U8
870/// goes PQ U16 → Linear F32 → sRGB U8).
871fn depth_steps(
872    from: ChannelType,
873    to: ChannelType,
874    from_tf: TransferFunction,
875    to_tf: TransferFunction,
876) -> Result<Vec<ConvertStep>, ConvertError> {
877    if from == to && from_tf == to_tf {
878        return Ok(Vec::new());
879    }
880
881    // Same depth, different transfer function.
882    // For integer types, TF changes are metadata-only (no math).
883    // For F32, we can apply EOTF/OETF in place.
884    if from == to && from != ChannelType::F32 {
885        return Ok(Vec::new());
886    }
887
888    if from == to && from == ChannelType::F32 {
889        return match (from_tf, to_tf) {
890            (TransferFunction::Pq, TransferFunction::Linear) => {
891                Ok(vec![ConvertStep::PqF32ToLinearF32])
892            }
893            (TransferFunction::Linear, TransferFunction::Pq) => {
894                Ok(vec![ConvertStep::LinearF32ToPqF32])
895            }
896            (TransferFunction::Hlg, TransferFunction::Linear) => {
897                Ok(vec![ConvertStep::HlgF32ToLinearF32])
898            }
899            (TransferFunction::Linear, TransferFunction::Hlg) => {
900                Ok(vec![ConvertStep::LinearF32ToHlgF32])
901            }
902            // PQ ↔ HLG: go through linear.
903            (TransferFunction::Pq, TransferFunction::Hlg) => Ok(vec![
904                ConvertStep::PqF32ToLinearF32,
905                ConvertStep::LinearF32ToHlgF32,
906            ]),
907            (TransferFunction::Hlg, TransferFunction::Pq) => Ok(vec![
908                ConvertStep::HlgF32ToLinearF32,
909                ConvertStep::LinearF32ToPqF32,
910            ]),
911            (TransferFunction::Srgb, TransferFunction::Linear) => {
912                Ok(vec![ConvertStep::SrgbF32ToLinearF32])
913            }
914            (TransferFunction::Linear, TransferFunction::Srgb) => {
915                Ok(vec![ConvertStep::LinearF32ToSrgbF32])
916            }
917            (TransferFunction::Bt709, TransferFunction::Linear) => {
918                Ok(vec![ConvertStep::Bt709F32ToLinearF32])
919            }
920            (TransferFunction::Linear, TransferFunction::Bt709) => {
921                Ok(vec![ConvertStep::LinearF32ToBt709F32])
922            }
923            // sRGB ↔ BT.709: go through linear.
924            (TransferFunction::Srgb, TransferFunction::Bt709) => Ok(vec![
925                ConvertStep::SrgbF32ToLinearF32,
926                ConvertStep::LinearF32ToBt709F32,
927            ]),
928            (TransferFunction::Bt709, TransferFunction::Srgb) => Ok(vec![
929                ConvertStep::Bt709F32ToLinearF32,
930                ConvertStep::LinearF32ToSrgbF32,
931            ]),
932            // sRGB/BT.709 ↔ PQ/HLG: go through linear.
933            (TransferFunction::Srgb, TransferFunction::Pq) => Ok(vec![
934                ConvertStep::SrgbF32ToLinearF32,
935                ConvertStep::LinearF32ToPqF32,
936            ]),
937            (TransferFunction::Srgb, TransferFunction::Hlg) => Ok(vec![
938                ConvertStep::SrgbF32ToLinearF32,
939                ConvertStep::LinearF32ToHlgF32,
940            ]),
941            (TransferFunction::Pq, TransferFunction::Srgb) => Ok(vec![
942                ConvertStep::PqF32ToLinearF32,
943                ConvertStep::LinearF32ToSrgbF32,
944            ]),
945            (TransferFunction::Hlg, TransferFunction::Srgb) => Ok(vec![
946                ConvertStep::HlgF32ToLinearF32,
947                ConvertStep::LinearF32ToSrgbF32,
948            ]),
949            (TransferFunction::Bt709, TransferFunction::Pq) => Ok(vec![
950                ConvertStep::Bt709F32ToLinearF32,
951                ConvertStep::LinearF32ToPqF32,
952            ]),
953            (TransferFunction::Bt709, TransferFunction::Hlg) => Ok(vec![
954                ConvertStep::Bt709F32ToLinearF32,
955                ConvertStep::LinearF32ToHlgF32,
956            ]),
957            (TransferFunction::Pq, TransferFunction::Bt709) => Ok(vec![
958                ConvertStep::PqF32ToLinearF32,
959                ConvertStep::LinearF32ToBt709F32,
960            ]),
961            (TransferFunction::Hlg, TransferFunction::Bt709) => Ok(vec![
962                ConvertStep::HlgF32ToLinearF32,
963                ConvertStep::LinearF32ToBt709F32,
964            ]),
965            _ => Ok(Vec::new()),
966        };
967    }
968
969    match (from, to) {
970        (ChannelType::U8, ChannelType::F32) => {
971            if (from_tf == TransferFunction::Srgb || from_tf == TransferFunction::Bt709)
972                && to_tf == TransferFunction::Linear
973            {
974                Ok(vec![ConvertStep::SrgbU8ToLinearF32])
975            } else {
976                Ok(vec![ConvertStep::NaiveU8ToF32])
977            }
978        }
979        (ChannelType::F32, ChannelType::U8) => {
980            if from_tf == TransferFunction::Linear
981                && (to_tf == TransferFunction::Srgb || to_tf == TransferFunction::Bt709)
982            {
983                Ok(vec![ConvertStep::LinearF32ToSrgbU8])
984            } else {
985                Ok(vec![ConvertStep::NaiveF32ToU8])
986            }
987        }
988        (ChannelType::U16, ChannelType::F32) => {
989            // PQ/HLG U16 → Linear F32: apply EOTF during conversion.
990            match (from_tf, to_tf) {
991                (TransferFunction::Pq, TransferFunction::Linear) => {
992                    Ok(vec![ConvertStep::PqU16ToLinearF32])
993                }
994                (TransferFunction::Hlg, TransferFunction::Linear) => {
995                    Ok(vec![ConvertStep::HlgU16ToLinearF32])
996                }
997                _ => Ok(vec![ConvertStep::U16ToF32]),
998            }
999        }
1000        (ChannelType::F32, ChannelType::U16) => {
1001            // Linear F32 → PQ/HLG U16: apply OETF during conversion.
1002            match (from_tf, to_tf) {
1003                (TransferFunction::Linear, TransferFunction::Pq) => {
1004                    Ok(vec![ConvertStep::LinearF32ToPqU16])
1005                }
1006                (TransferFunction::Linear, TransferFunction::Hlg) => {
1007                    Ok(vec![ConvertStep::LinearF32ToHlgU16])
1008                }
1009                _ => Ok(vec![ConvertStep::F32ToU16]),
1010            }
1011        }
1012        (ChannelType::U16, ChannelType::U8) => {
1013            // HDR U16 → SDR U8: go through linear F32 with proper EOTF → OETF.
1014            if from_tf == TransferFunction::Pq && to_tf == TransferFunction::Srgb {
1015                Ok(vec![
1016                    ConvertStep::PqU16ToLinearF32,
1017                    ConvertStep::LinearF32ToSrgbU8,
1018                ])
1019            } else if from_tf == TransferFunction::Hlg && to_tf == TransferFunction::Srgb {
1020                Ok(vec![
1021                    ConvertStep::HlgU16ToLinearF32,
1022                    ConvertStep::LinearF32ToSrgbU8,
1023                ])
1024            } else {
1025                Ok(vec![ConvertStep::U16ToU8])
1026            }
1027        }
1028        (ChannelType::U8, ChannelType::U16) => Ok(vec![ConvertStep::U8ToU16]),
1029        _ => Err(ConvertError::NoPath {
1030            from: PixelDescriptor::new(from, ChannelLayout::Rgb, None, from_tf),
1031            to: PixelDescriptor::new(to, ChannelLayout::Rgb, None, to_tf),
1032        }),
1033    }
1034}
1035
1036// ---------------------------------------------------------------------------
1037// Row conversion kernels
1038// ---------------------------------------------------------------------------
1039
1040/// Pre-allocated scratch buffer for multi-step row conversions.
1041///
1042/// Eliminates per-row heap allocation by reusing two ping-pong halves
1043/// of a single buffer across calls. Create once per [`ConvertPlan`],
1044/// then pass to `convert_row_buffered` for each row.
1045pub(crate) struct ConvertScratch {
1046    /// Single allocation split into two halves via `split_at_mut`.
1047    /// Stored as `Vec<u32>` to guarantee 4-byte alignment, which lets
1048    /// garb and bytemuck use fast aligned paths instead of unaligned fallbacks.
1049    buf: Vec<u32>,
1050}
1051
1052impl ConvertScratch {
1053    /// Create empty scratch (buffer grows on first use).
1054    pub(crate) fn new() -> Self {
1055        Self { buf: Vec::new() }
1056    }
1057
1058    /// Ensure the buffer is large enough for two halves of the max
1059    /// intermediate format at the given width.
1060    fn ensure_capacity(&mut self, plan: &ConvertPlan, width: u32) {
1061        let half_bytes = (width as usize) * plan.max_intermediate_bpp();
1062        let total_u32 = (half_bytes * 2).div_ceil(4);
1063        if self.buf.len() < total_u32 {
1064            self.buf.resize(total_u32, 0);
1065        }
1066    }
1067}
1068
1069impl core::fmt::Debug for ConvertScratch {
1070    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1071        f.debug_struct("ConvertScratch")
1072            .field("capacity", &self.buf.capacity())
1073            .finish()
1074    }
1075}
1076
1077/// Convert one row of `width` pixels using a pre-computed plan.
1078///
1079/// `src` and `dst` must be sized for `width` pixels in their respective formats.
1080/// For multi-step plans, an internal scratch buffer is allocated per call.
1081/// Prefer [`RowConverter`](crate::RowConverter) in hot loops (reuses scratch buffers).
1082pub fn convert_row(plan: &ConvertPlan, src: &[u8], dst: &mut [u8], width: u32) {
1083    if plan.is_identity() {
1084        let len = min(src.len(), dst.len());
1085        dst[..len].copy_from_slice(&src[..len]);
1086        return;
1087    }
1088
1089    if plan.steps.len() == 1 {
1090        apply_step_u8(&plan.steps[0], src, dst, width, plan.from, plan.to);
1091        return;
1092    }
1093
1094    // Allocating fallback for one-off calls.
1095    let mut scratch = ConvertScratch::new();
1096    convert_row_buffered(plan, src, dst, width, &mut scratch);
1097}
1098
1099/// Convert one row of `width` pixels, reusing pre-allocated scratch buffers.
1100///
1101/// For multi-step plans this avoids per-row heap allocation by ping-ponging
1102/// between two halves of a scratch buffer. Single-step plans bypass scratch.
1103pub(crate) fn convert_row_buffered(
1104    plan: &ConvertPlan,
1105    src: &[u8],
1106    dst: &mut [u8],
1107    width: u32,
1108    scratch: &mut ConvertScratch,
1109) {
1110    if plan.is_identity() {
1111        let len = min(src.len(), dst.len());
1112        dst[..len].copy_from_slice(&src[..len]);
1113        return;
1114    }
1115
1116    if plan.steps.len() == 1 {
1117        apply_step_u8(&plan.steps[0], src, dst, width, plan.from, plan.to);
1118        return;
1119    }
1120
1121    scratch.ensure_capacity(plan, width);
1122
1123    let buf_bytes: &mut [u8] = bytemuck::cast_slice_mut(&mut scratch.buf);
1124    let half = buf_bytes.len() / 2;
1125    let (buf_a, buf_b) = buf_bytes.split_at_mut(half);
1126
1127    let num_steps = plan.steps.len();
1128    let mut current_desc = plan.from;
1129
1130    for (i, step) in plan.steps.iter().enumerate() {
1131        let is_last = i == num_steps - 1;
1132        let next_desc = if is_last {
1133            plan.to
1134        } else {
1135            intermediate_desc(current_desc, step)
1136        };
1137
1138        let next_len = (width as usize) * next_desc.bytes_per_pixel();
1139        let curr_len = (width as usize) * current_desc.bytes_per_pixel();
1140
1141        // Ping-pong: even steps read src/buf_b and write buf_a;
1142        // odd steps read buf_a and write buf_b. Each branch only
1143        // borrows each half in one mode, satisfying the borrow checker.
1144        if i % 2 == 0 {
1145            let input = if i == 0 { src } else { &buf_b[..curr_len] };
1146            if is_last {
1147                apply_step_u8(step, input, dst, width, current_desc, next_desc);
1148            } else {
1149                apply_step_u8(
1150                    step,
1151                    input,
1152                    &mut buf_a[..next_len],
1153                    width,
1154                    current_desc,
1155                    next_desc,
1156                );
1157            }
1158        } else {
1159            let input = &buf_a[..curr_len];
1160            if is_last {
1161                apply_step_u8(step, input, dst, width, current_desc, next_desc);
1162            } else {
1163                apply_step_u8(
1164                    step,
1165                    input,
1166                    &mut buf_b[..next_len],
1167                    width,
1168                    current_desc,
1169                    next_desc,
1170                );
1171            }
1172        }
1173
1174        current_desc = next_desc;
1175    }
1176}
1177
1178/// Check if two steps are inverses that cancel each other.
1179/// Collapse `[SrgbU8ToLinearF32, GamutMatrix*F32(m), LinearF32ToSrgbU8]`
1180/// into a single fused matlut step. Mutates in place.
1181fn fuse_matlut_patterns(steps: &mut Vec<ConvertStep>) {
1182    let mut i = 0;
1183    while i + 2 < steps.len() {
1184        let rewrite = match (&steps[i], &steps[i + 1], &steps[i + 2]) {
1185            (
1186                ConvertStep::SrgbU8ToLinearF32,
1187                ConvertStep::GamutMatrixRgbF32(m),
1188                ConvertStep::LinearF32ToSrgbU8,
1189            ) => Some(ConvertStep::FusedSrgbU8GamutRgb(*m)),
1190            (
1191                ConvertStep::SrgbU8ToLinearF32,
1192                ConvertStep::GamutMatrixRgbaF32(m),
1193                ConvertStep::LinearF32ToSrgbU8,
1194            ) => Some(ConvertStep::FusedSrgbU8GamutRgba(*m)),
1195            _ => None,
1196        };
1197        if let Some(fused) = rewrite {
1198            steps[i] = fused;
1199            steps.drain(i + 1..i + 3);
1200            continue;
1201        }
1202        i += 1;
1203    }
1204}
1205
1206fn are_inverse(a: &ConvertStep, b: &ConvertStep) -> bool {
1207    matches!(
1208        (a, b),
1209        // Self-inverse
1210        (ConvertStep::SwizzleBgraRgba, ConvertStep::SwizzleBgraRgba)
1211        // Layout inverses (lossless for opaque data)
1212        | (ConvertStep::AddAlpha, ConvertStep::DropAlpha)
1213        // Transfer function f32↔f32 (exact inverses in float)
1214        | (ConvertStep::SrgbF32ToLinearF32, ConvertStep::LinearF32ToSrgbF32)
1215        | (ConvertStep::LinearF32ToSrgbF32, ConvertStep::SrgbF32ToLinearF32)
1216        | (ConvertStep::PqF32ToLinearF32, ConvertStep::LinearF32ToPqF32)
1217        | (ConvertStep::LinearF32ToPqF32, ConvertStep::PqF32ToLinearF32)
1218        | (ConvertStep::HlgF32ToLinearF32, ConvertStep::LinearF32ToHlgF32)
1219        | (ConvertStep::LinearF32ToHlgF32, ConvertStep::HlgF32ToLinearF32)
1220        | (ConvertStep::Bt709F32ToLinearF32, ConvertStep::LinearF32ToBt709F32)
1221        | (ConvertStep::LinearF32ToBt709F32, ConvertStep::Bt709F32ToLinearF32)
1222        // Alpha mode (exact inverses in float)
1223        | (ConvertStep::StraightToPremul, ConvertStep::PremulToStraight)
1224        | (ConvertStep::PremulToStraight, ConvertStep::StraightToPremul)
1225        // Color model (exact inverses in float)
1226        | (ConvertStep::LinearRgbToOklab, ConvertStep::OklabToLinearRgb)
1227        | (ConvertStep::OklabToLinearRgb, ConvertStep::LinearRgbToOklab)
1228        | (ConvertStep::LinearRgbaToOklaba, ConvertStep::OklabaToLinearRgba)
1229        | (ConvertStep::OklabaToLinearRgba, ConvertStep::LinearRgbaToOklaba)
1230        // Cross-depth pairs (near-lossless for same depth class)
1231        | (ConvertStep::NaiveU8ToF32, ConvertStep::NaiveF32ToU8)
1232        | (ConvertStep::NaiveF32ToU8, ConvertStep::NaiveU8ToF32)
1233        | (ConvertStep::U8ToU16, ConvertStep::U16ToU8)
1234        | (ConvertStep::U16ToU8, ConvertStep::U8ToU16)
1235        | (ConvertStep::U16ToF32, ConvertStep::F32ToU16)
1236        | (ConvertStep::F32ToU16, ConvertStep::U16ToF32)
1237        // Cross-depth with transfer (near-lossless roundtrip)
1238        | (ConvertStep::SrgbU8ToLinearF32, ConvertStep::LinearF32ToSrgbU8)
1239        | (ConvertStep::LinearF32ToSrgbU8, ConvertStep::SrgbU8ToLinearF32)
1240        | (ConvertStep::PqU16ToLinearF32, ConvertStep::LinearF32ToPqU16)
1241        | (ConvertStep::LinearF32ToPqU16, ConvertStep::PqU16ToLinearF32)
1242        | (ConvertStep::HlgU16ToLinearF32, ConvertStep::LinearF32ToHlgU16)
1243        | (ConvertStep::LinearF32ToHlgU16, ConvertStep::HlgU16ToLinearF32)
1244        // Extended-range sRGB f32 pairs
1245        | (ConvertStep::SrgbF32ToLinearF32Extended, ConvertStep::LinearF32ToSrgbF32Extended)
1246        | (ConvertStep::LinearF32ToSrgbF32Extended, ConvertStep::SrgbF32ToLinearF32Extended)
1247    )
1248}
1249
1250/// Compute the descriptor after applying one step.
1251fn intermediate_desc(current: PixelDescriptor, step: &ConvertStep) -> PixelDescriptor {
1252    match step {
1253        ConvertStep::Identity => current,
1254        ConvertStep::SwizzleBgraRgba => {
1255            let new_layout = match current.layout() {
1256                ChannelLayout::Bgra => ChannelLayout::Rgba,
1257                ChannelLayout::Rgba => ChannelLayout::Bgra,
1258                other => other,
1259            };
1260            PixelDescriptor::new(
1261                current.channel_type(),
1262                new_layout,
1263                current.alpha(),
1264                current.transfer(),
1265            )
1266        }
1267        ConvertStep::AddAlpha => PixelDescriptor::new(
1268            current.channel_type(),
1269            ChannelLayout::Rgba,
1270            Some(AlphaMode::Straight),
1271            current.transfer(),
1272        ),
1273        ConvertStep::DropAlpha | ConvertStep::MatteComposite { .. } => PixelDescriptor::new(
1274            current.channel_type(),
1275            ChannelLayout::Rgb,
1276            None,
1277            current.transfer(),
1278        ),
1279        ConvertStep::GrayToRgb => PixelDescriptor::new(
1280            current.channel_type(),
1281            ChannelLayout::Rgb,
1282            None,
1283            current.transfer(),
1284        ),
1285        ConvertStep::GrayToRgba => PixelDescriptor::new(
1286            current.channel_type(),
1287            ChannelLayout::Rgba,
1288            Some(AlphaMode::Straight),
1289            current.transfer(),
1290        ),
1291        ConvertStep::RgbToGray | ConvertStep::RgbaToGray => PixelDescriptor::new(
1292            current.channel_type(),
1293            ChannelLayout::Gray,
1294            None,
1295            current.transfer(),
1296        ),
1297        ConvertStep::GrayAlphaToRgba => PixelDescriptor::new(
1298            current.channel_type(),
1299            ChannelLayout::Rgba,
1300            current.alpha(),
1301            current.transfer(),
1302        ),
1303        ConvertStep::GrayAlphaToRgb => PixelDescriptor::new(
1304            current.channel_type(),
1305            ChannelLayout::Rgb,
1306            None,
1307            current.transfer(),
1308        ),
1309        ConvertStep::GrayToGrayAlpha => PixelDescriptor::new(
1310            current.channel_type(),
1311            ChannelLayout::GrayAlpha,
1312            Some(AlphaMode::Straight),
1313            current.transfer(),
1314        ),
1315        ConvertStep::GrayAlphaToGray => PixelDescriptor::new(
1316            current.channel_type(),
1317            ChannelLayout::Gray,
1318            None,
1319            current.transfer(),
1320        ),
1321        ConvertStep::SrgbU8ToLinearF32
1322        | ConvertStep::NaiveU8ToF32
1323        | ConvertStep::U16ToF32
1324        | ConvertStep::PqU16ToLinearF32
1325        | ConvertStep::HlgU16ToLinearF32
1326        | ConvertStep::PqF32ToLinearF32
1327        | ConvertStep::HlgF32ToLinearF32
1328        | ConvertStep::SrgbF32ToLinearF32
1329        | ConvertStep::SrgbF32ToLinearF32Extended
1330        | ConvertStep::Bt709F32ToLinearF32 => PixelDescriptor::new(
1331            ChannelType::F32,
1332            current.layout(),
1333            current.alpha(),
1334            TransferFunction::Linear,
1335        ),
1336        ConvertStep::LinearF32ToSrgbU8 | ConvertStep::NaiveF32ToU8 | ConvertStep::U16ToU8 => {
1337            PixelDescriptor::new(
1338                ChannelType::U8,
1339                current.layout(),
1340                current.alpha(),
1341                TransferFunction::Srgb,
1342            )
1343        }
1344        ConvertStep::U8ToU16 => PixelDescriptor::new(
1345            ChannelType::U16,
1346            current.layout(),
1347            current.alpha(),
1348            current.transfer(),
1349        ),
1350        ConvertStep::F32ToU16 | ConvertStep::LinearF32ToPqU16 | ConvertStep::LinearF32ToHlgU16 => {
1351            let tf = match step {
1352                ConvertStep::LinearF32ToPqU16 => TransferFunction::Pq,
1353                ConvertStep::LinearF32ToHlgU16 => TransferFunction::Hlg,
1354                _ => current.transfer(),
1355            };
1356            PixelDescriptor::new(ChannelType::U16, current.layout(), current.alpha(), tf)
1357        }
1358        ConvertStep::LinearF32ToPqF32 => PixelDescriptor::new(
1359            ChannelType::F32,
1360            current.layout(),
1361            current.alpha(),
1362            TransferFunction::Pq,
1363        ),
1364        ConvertStep::LinearF32ToHlgF32 => PixelDescriptor::new(
1365            ChannelType::F32,
1366            current.layout(),
1367            current.alpha(),
1368            TransferFunction::Hlg,
1369        ),
1370        ConvertStep::LinearF32ToSrgbF32 | ConvertStep::LinearF32ToSrgbF32Extended => {
1371            PixelDescriptor::new(
1372                ChannelType::F32,
1373                current.layout(),
1374                current.alpha(),
1375                TransferFunction::Srgb,
1376            )
1377        }
1378        ConvertStep::LinearF32ToBt709F32 => PixelDescriptor::new(
1379            ChannelType::F32,
1380            current.layout(),
1381            current.alpha(),
1382            TransferFunction::Bt709,
1383        ),
1384        ConvertStep::StraightToPremul => PixelDescriptor::new(
1385            current.channel_type(),
1386            current.layout(),
1387            Some(AlphaMode::Premultiplied),
1388            current.transfer(),
1389        ),
1390        ConvertStep::PremulToStraight => PixelDescriptor::new(
1391            current.channel_type(),
1392            current.layout(),
1393            Some(AlphaMode::Straight),
1394            current.transfer(),
1395        ),
1396        ConvertStep::LinearRgbToOklab => PixelDescriptor::new(
1397            ChannelType::F32,
1398            ChannelLayout::Oklab,
1399            None,
1400            TransferFunction::Unknown,
1401        )
1402        .with_primaries(current.primaries),
1403        ConvertStep::OklabToLinearRgb => PixelDescriptor::new(
1404            ChannelType::F32,
1405            ChannelLayout::Rgb,
1406            None,
1407            TransferFunction::Linear,
1408        )
1409        .with_primaries(current.primaries),
1410        ConvertStep::LinearRgbaToOklaba => PixelDescriptor::new(
1411            ChannelType::F32,
1412            ChannelLayout::OklabA,
1413            Some(AlphaMode::Straight),
1414            TransferFunction::Unknown,
1415        )
1416        .with_primaries(current.primaries),
1417        ConvertStep::OklabaToLinearRgba => PixelDescriptor::new(
1418            ChannelType::F32,
1419            ChannelLayout::Rgba,
1420            current.alpha(),
1421            TransferFunction::Linear,
1422        )
1423        .with_primaries(current.primaries),
1424
1425        // Gamut matrix: same depth/layout/TF, but primaries change.
1426        // The actual target primaries are embedded in the matrix, not tracked
1427        // here — we mark them as Unknown since the step doesn't carry that info.
1428        // The final plan.to descriptor has the correct primaries.
1429        ConvertStep::GamutMatrixRgbF32(_) => PixelDescriptor::new(
1430            ChannelType::F32,
1431            current.layout(),
1432            current.alpha(),
1433            TransferFunction::Linear,
1434        ),
1435        ConvertStep::GamutMatrixRgbaF32(_) => PixelDescriptor::new(
1436            ChannelType::F32,
1437            current.layout(),
1438            current.alpha(),
1439            TransferFunction::Linear,
1440        ),
1441        // Fused steps: u8 sRGB in, u8 sRGB out (same layout, same alpha).
1442        ConvertStep::FusedSrgbU8GamutRgb(_) | ConvertStep::FusedSrgbU8GamutRgba(_) => {
1443            PixelDescriptor::new(
1444                ChannelType::U8,
1445                current.layout(),
1446                current.alpha(),
1447                TransferFunction::Srgb,
1448            )
1449        }
1450        ConvertStep::FusedSrgbU16GamutRgb(_) => PixelDescriptor::new(
1451            ChannelType::U16,
1452            current.layout(),
1453            current.alpha(),
1454            TransferFunction::Srgb,
1455        ),
1456        ConvertStep::FusedSrgbU8ToLinearF32Rgb(_) => PixelDescriptor::new(
1457            ChannelType::F32,
1458            current.layout(),
1459            current.alpha(),
1460            TransferFunction::Linear,
1461        ),
1462        ConvertStep::FusedLinearF32ToSrgbU8Rgb(_) => PixelDescriptor::new(
1463            ChannelType::U8,
1464            current.layout(),
1465            current.alpha(),
1466            TransferFunction::Srgb,
1467        ),
1468    }
1469}
1470
1471#[path = "convert_kernels.rs"]
1472mod convert_kernels;
1473use convert_kernels::apply_step_u8;
1474pub(crate) use convert_kernels::{hlg_eotf, hlg_oetf, pq_eotf, pq_oetf};
zenpixels_convert/convert.rs

zenpixels_convert/
convert.rs