Skip to main content

hayro_jpeg2000/
lib.rs

1/*!
2A memory-safe, pure-Rust JPEG 2000 decoder.
3
4`hayro-jpeg2000` can decode both raw JPEG 2000 codestreams (`.j2c`) and images wrapped
5inside the JP2 container format. The decoder supports the vast majority of features
6defined in the JPEG2000 core coding system (ISO/IEC 15444-1) as well as some color
7spaces from the extensions (ISO/IEC 15444-2). There are still some missing pieces
8for some "obscure" features(like for example support for progression order
9changes in tile-parts), but all features that actually commonly appear in real-life
10images should be supported (if not, please open an issue!).
11
12The decoder abstracts away most of the internal complexity of JPEG2000
13and yields a simple 8-bit image with either greyscale, RGB, CMYK or an ICC-based
14color space, which can then be processed further according to your needs.
15
16# Example
17```rust,no_run
18use hayro_jpeg2000::{Image, DecodeSettings};
19
20let data = std::fs::read("image.jp2").unwrap();
21let image = Image::new(&data, &DecodeSettings::default()).unwrap();
22
23println!(
24    "{}x{} image in {:?} with alpha={}",
25    image.width(),
26    image.height(),
27    image.color_space(),
28    image.has_alpha(),
29);
30
31let bitmap = image.decode().unwrap();
32```
33
34If you want to see a more comprehensive example, please take a look
35at the example in [GitHub](https://github.com/LaurenzV/hayro/blob/main/hayro-jpeg2000/examples/png.rs),
36which shows you the main steps needed to convert a JPEG2000 image into PNG for example.
37
38# Testing
39The decoder has been tested against 20.000+ images scraped from random PDFs
40on the internet and also passes a large part of the `OpenJPEG` test suite. So you
41can expect the crate to perform decently in terms of decoding correctness.
42
43# Performance
44A decent amount of effort has already been put into optimizing this crate
45(both in terms of raw performance but also memory allocations). However, there
46are some more important optimizations that have not been implemented yet, so
47there is definitely still room for improvement (and I am planning on implementing
48them eventually).
49
50Overall, you should expect this crate to have worse performance than `OpenJPEG`,
51but the difference gap should not be too large.
52
53# Safety
54By default, the crate has the `simd` feature enabled, which uses the
55[`fearless_simd`](https://github.com/linebender/fearless_simd) crate to accelerate
56important parts of the pipeline. If you want to eliminate any usage of unsafe
57in this crate as well as its dependencies, you can simply disable this
58feature, at the cost of worse decoding performance. Unsafe code is forbidden
59via a crate-level attribute.
60
61The crate is `no_std` compatible but requires an allocator to be available.
62*/
63
64#![cfg_attr(not(feature = "std"), no_std)]
65#![forbid(unsafe_code)]
66#![forbid(missing_docs)]
67
68extern crate alloc;
69
70use alloc::vec;
71use alloc::vec::Vec;
72
73use crate::error::{bail, err};
74use crate::j2c::{ComponentData, Header};
75use crate::jp2::cdef::{ChannelAssociation, ChannelType};
76use crate::jp2::cmap::ComponentMappingType;
77use crate::jp2::colr::{CieLab, EnumeratedColorspace};
78use crate::jp2::icc::ICCMetadata;
79use crate::jp2::{DecodedImage, ImageBoxes};
80
81pub mod error;
82#[macro_use]
83pub(crate) mod log;
84pub(crate) mod math;
85
86use crate::math::{Level, SIMD_WIDTH, Simd, dispatch, f32x8};
87pub use error::{
88    ColorError, DecodeError, DecodingError, FormatError, MarkerError, Result, TileError,
89    ValidationError,
90};
91pub use j2c::DecoderContext;
92
93#[cfg(feature = "image")]
94pub mod integration;
95mod j2c;
96mod jp2;
97pub(crate) mod reader;
98
99/// JP2 signature box: 00 00 00 0C 6A 50 20 20
100pub(crate) const JP2_MAGIC: &[u8] = b"\x00\x00\x00\x0C\x6A\x50\x20\x20";
101/// Codestream signature: FF 4F FF 51 (SOC + SIZ markers)
102pub(crate) const CODESTREAM_MAGIC: &[u8] = b"\xFF\x4F\xFF\x51";
103
104/// Settings to apply during decoding.
105#[derive(Debug, Copy, Clone)]
106pub struct DecodeSettings {
107    /// Whether palette indices should be resolved.
108    ///
109    /// JPEG2000 images can be stored in two different ways. First, by storing
110    /// RGB values (depending on the color space) for each pixel. Secondly, by
111    /// only storing a single index for each channel, and then resolving the
112    /// actual color using the index.
113    ///
114    /// If you disable this option, in case you have an image with palette
115    /// indices, they will not be resolved, but instead a grayscale image
116    /// will be returned, with each pixel value corresponding to the palette
117    /// index of the location.
118    pub resolve_palette_indices: bool,
119    /// Whether strict mode should be enabled when decoding.
120    ///
121    /// It is recommended to leave this flag disabled, unless you have a
122    /// specific reason not to.
123    pub strict: bool,
124    /// A hint for the target resolution that the image should be decoded at.
125    pub target_resolution: Option<(u32, u32)>,
126}
127
128impl Default for DecodeSettings {
129    fn default() -> Self {
130        Self {
131            resolve_palette_indices: true,
132            strict: false,
133            target_resolution: None,
134        }
135    }
136}
137
138/// A JPEG2000 image or codestream.
139pub struct Image<'a> {
140    /// The codestream containing the data to decode.
141    pub(crate) codestream: &'a [u8],
142    /// The header of the J2C codestream.
143    pub(crate) header: Header<'a>,
144    /// The JP2 boxes of the image. In the case of a raw codestream, we
145    /// will synthesize the necessary boxes.
146    pub(crate) boxes: ImageBoxes,
147    /// Settings that should be applied during decoding.
148    pub(crate) settings: DecodeSettings,
149    /// Whether the image has an alpha channel.
150    pub(crate) has_alpha: bool,
151    /// The color space of the image.
152    pub(crate) color_space: ColorSpace,
153}
154
155impl<'a> Image<'a> {
156    /// Try to create a new JPEG2000 image from the given data.
157    pub fn new(data: &'a [u8], settings: &DecodeSettings) -> Result<Self> {
158        if data.starts_with(JP2_MAGIC) {
159            jp2::parse(data, *settings)
160        } else if data.starts_with(CODESTREAM_MAGIC) {
161            j2c::parse(data, settings)
162        } else {
163            err!(FormatError::InvalidSignature)
164        }
165    }
166
167    /// Whether the image has an alpha channel.
168    pub fn has_alpha(&self) -> bool {
169        self.has_alpha
170    }
171
172    /// The color space of the image.
173    pub fn color_space(&self) -> &ColorSpace {
174        &self.color_space
175    }
176
177    /// The width of the image.
178    pub fn width(&self) -> u32 {
179        self.header.size_data.image_width()
180    }
181
182    /// The height of the image.
183    pub fn height(&self) -> u32 {
184        self.header.size_data.image_height()
185    }
186
187    /// The original bit depth of the image. You usually don't need to do anything
188    /// with this parameter, it just exists for informational purposes.
189    pub fn original_bit_depth(&self) -> u8 {
190        // Note that this only works if all components have the same precision.
191        self.header.component_infos[0].size_info.precision
192    }
193
194    /// Decode the image and return its decoded result as a `Vec<u8>`, with each
195    /// channel interleaved.
196    pub fn decode(&self) -> Result<Vec<u8>> {
197        let buffer_size = self.width() as usize
198            * self.height() as usize
199            * (self.color_space.num_channels() as usize + if self.has_alpha { 1 } else { 0 });
200        let mut buf = vec![0; buffer_size];
201        let mut decoder_context = DecoderContext::default();
202        self.decode_into(&mut buf, &mut decoder_context)?;
203
204        Ok(buf)
205    }
206
207    /// Decode the image into the given buffer.
208    ///
209    /// This method does the same as [`Image::decode`], but you can provide
210    /// a custom buffer for the output, as well as a decoder context. Doing
211    /// so will allow `hayro-jpeg2000` to reuse memory allocations, so this is
212    /// especially recommended if you plan on converting multiple images
213    /// in the same session.
214    ///
215    /// The buffer must have the correct size.
216    pub fn decode_into(
217        &'a self,
218        buf: &mut [u8],
219        decoder_context: &mut DecoderContext<'a>,
220    ) -> Result<()> {
221        let settings = &self.settings;
222        j2c::decode(self.codestream, &self.header, decoder_context)?;
223        let mut decoded_image = DecodedImage {
224            decoded_components: &mut decoder_context.channel_data,
225            boxes: self.boxes.clone(),
226        };
227
228        // Resolve palette indices.
229        if settings.resolve_palette_indices {
230            let components = core::mem::take(decoded_image.decoded_components);
231            *decoded_image.decoded_components =
232                resolve_palette_indices(components, &decoded_image.boxes)?;
233        }
234
235        if let Some(cdef) = &decoded_image.boxes.channel_definition {
236            // Sort by the channel association. Note that this will only work if
237            // each component is referenced only once.
238            let mut components = decoded_image
239                .decoded_components
240                .iter()
241                .cloned()
242                .zip(
243                    cdef.channel_definitions
244                        .iter()
245                        .map(|c| match c._association {
246                            ChannelAssociation::WholeImage => u16::MAX,
247                            ChannelAssociation::Colour(c) => c,
248                        }),
249                )
250                .collect::<Vec<_>>();
251            components.sort_by(|c1, c2| c1.1.cmp(&c2.1));
252            *decoded_image.decoded_components = components.into_iter().map(|c| c.0).collect();
253        }
254
255        // Note that this is only valid if all images have the same bit depth.
256        let bit_depth = decoded_image.decoded_components[0].bit_depth;
257        convert_color_space(&mut decoded_image, bit_depth)?;
258        interleave_and_convert(&mut decoded_image, buf);
259
260        Ok(())
261    }
262}
263
264pub(crate) fn resolve_alpha_and_color_space(
265    boxes: &ImageBoxes,
266    header: &Header<'_>,
267    settings: &DecodeSettings,
268) -> Result<(ColorSpace, bool)> {
269    let mut num_components = header.component_infos.len();
270
271    // Override number of components with what is actually in the palette box
272    // in case we resolve them.
273    if settings.resolve_palette_indices
274        && let Some(palette_box) = &boxes.palette
275    {
276        num_components = palette_box.columns.len();
277    }
278
279    let mut has_alpha = false;
280
281    if let Some(cdef) = &boxes.channel_definition {
282        let last = cdef.channel_definitions.last().unwrap();
283        has_alpha = last.channel_type == ChannelType::Opacity;
284    }
285
286    let mut color_space = get_color_space(boxes, num_components)?;
287
288    // If we didn't resolve palette indices, we need to assume grayscale image.
289    if !settings.resolve_palette_indices && boxes.palette.is_some() {
290        has_alpha = false;
291        color_space = ColorSpace::Gray;
292    }
293
294    let actual_num_components = header.component_infos.len();
295
296    // Validate the number of channels.
297    if boxes.palette.is_none()
298        && actual_num_components
299            != (color_space.num_channels() + if has_alpha { 1 } else { 0 }) as usize
300    {
301        if !settings.strict
302            && actual_num_components == color_space.num_channels() as usize + 1
303            && !has_alpha
304        {
305            // See OPENJPEG test case orb-blue10-lin-j2k. Assume that we have an
306            // alpha channel in this case.
307            has_alpha = true;
308        } else {
309            // Color space is invalid, attempt to repair.
310            if actual_num_components == 1 || (actual_num_components == 2 && has_alpha) {
311                color_space = ColorSpace::Gray;
312            } else if actual_num_components == 3 {
313                color_space = ColorSpace::RGB;
314            } else if actual_num_components == 4 {
315                if has_alpha {
316                    color_space = ColorSpace::RGB;
317                } else {
318                    color_space = ColorSpace::CMYK;
319                }
320            } else {
321                bail!(ValidationError::TooManyChannels);
322            }
323        }
324    }
325
326    Ok((color_space, has_alpha))
327}
328
329/// The color space of the image.
330#[derive(Debug, Clone)]
331pub enum ColorSpace {
332    /// A grayscale image.
333    Gray,
334    /// An RGB image.
335    RGB,
336    /// A CMYK image.
337    CMYK,
338    /// An unknown color space.
339    Unknown {
340        /// The number of channels of the color space.
341        num_channels: u8,
342    },
343    /// An image based on an ICC profile.
344    Icc {
345        /// The raw data of the ICC profile.
346        profile: Vec<u8>,
347        /// The number of channels used by the ICC profile.
348        num_channels: u8,
349    },
350}
351
352impl ColorSpace {
353    /// Return the number of expected channels for the color space.
354    pub fn num_channels(&self) -> u8 {
355        match self {
356            Self::Gray => 1,
357            Self::RGB => 3,
358            Self::CMYK => 4,
359            Self::Unknown { num_channels } => *num_channels,
360            Self::Icc {
361                num_channels: num_components,
362                ..
363            } => *num_components,
364        }
365    }
366}
367
368/// A bitmap storing the decoded result of the image.
369pub struct Bitmap {
370    /// The color space of the image.
371    pub color_space: ColorSpace,
372    /// The raw pixel data of the image. The result will always be in
373    /// 8-bit (in case the original image had a different bit-depth,
374    /// hayro-jpeg2000 always scales to 8-bit).
375    ///
376    /// The size is guaranteed to equal
377    /// `width * height * (num_channels + (if has_alpha { 1 } else { 0 })`.
378    /// Pixels are interleaved on a per-channel basis, the alpha channel always
379    /// appearing as the last channel, if available.
380    pub data: Vec<u8>,
381    /// Whether the image has an alpha channel.
382    pub has_alpha: bool,
383    /// The width of the image.
384    pub width: u32,
385    /// The height of the image.
386    pub height: u32,
387    /// The original bit depth of the image. You usually don't need to do anything
388    /// with this parameter, it just exists for informational purposes.
389    pub original_bit_depth: u8,
390}
391
392fn interleave_and_convert(image: &mut DecodedImage<'_>, buf: &mut [u8]) {
393    let components = &mut *image.decoded_components;
394    let num_components = components.len();
395
396    let mut all_same_bit_depth = Some(components[0].bit_depth);
397
398    for component in components.iter().skip(1) {
399        if Some(component.bit_depth) != all_same_bit_depth {
400            all_same_bit_depth = None;
401        }
402    }
403
404    let max_len = components[0].container.truncated().len();
405
406    let mut output_iter = buf.iter_mut();
407
408    if all_same_bit_depth == Some(8) && num_components <= 4 {
409        // Fast path for the common case.
410        match num_components {
411            // Gray-scale.
412            1 => {
413                for (output, input) in output_iter.zip(
414                    components[0]
415                        .container
416                        .iter()
417                        .map(|v| math::round_f32(*v) as u8),
418                ) {
419                    *output = input;
420                }
421            }
422            // Gray-scale with alpha.
423            2 => {
424                let c0 = &components[0];
425                let c1 = &components[1];
426
427                let c0 = &c0.container[..max_len];
428                let c1 = &c1.container[..max_len];
429
430                for i in 0..max_len {
431                    *output_iter.next().unwrap() = math::round_f32(c0[i]) as u8;
432                    *output_iter.next().unwrap() = math::round_f32(c1[i]) as u8;
433                }
434            }
435            // RGB
436            3 => {
437                let c0 = &components[0];
438                let c1 = &components[1];
439                let c2 = &components[2];
440
441                let c0 = &c0.container[..max_len];
442                let c1 = &c1.container[..max_len];
443                let c2 = &c2.container[..max_len];
444
445                for i in 0..max_len {
446                    *output_iter.next().unwrap() = math::round_f32(c0[i]) as u8;
447                    *output_iter.next().unwrap() = math::round_f32(c1[i]) as u8;
448                    *output_iter.next().unwrap() = math::round_f32(c2[i]) as u8;
449                }
450            }
451            // RGBA or CMYK.
452            4 => {
453                let c0 = &components[0];
454                let c1 = &components[1];
455                let c2 = &components[2];
456                let c3 = &components[3];
457
458                let c0 = &c0.container[..max_len];
459                let c1 = &c1.container[..max_len];
460                let c2 = &c2.container[..max_len];
461                let c3 = &c3.container[..max_len];
462
463                for i in 0..max_len {
464                    *output_iter.next().unwrap() = math::round_f32(c0[i]) as u8;
465                    *output_iter.next().unwrap() = math::round_f32(c1[i]) as u8;
466                    *output_iter.next().unwrap() = math::round_f32(c2[i]) as u8;
467                    *output_iter.next().unwrap() = math::round_f32(c3[i]) as u8;
468                }
469            }
470            _ => unreachable!(),
471        }
472    } else {
473        // Slow path that also requires us to scale to 8 bit.
474        let mul_factor = ((1 << 8) - 1) as f32;
475
476        for sample in 0..max_len {
477            for channel in components.iter() {
478                *output_iter.next().unwrap() = math::round_f32(
479                    (channel.container[sample] / ((1_u32 << channel.bit_depth) - 1) as f32)
480                        * mul_factor,
481                ) as u8;
482            }
483        }
484    }
485}
486
487fn convert_color_space(image: &mut DecodedImage<'_>, bit_depth: u8) -> Result<()> {
488    if let Some(jp2::colr::ColorSpace::Enumerated(e)) = &image
489        .boxes
490        .color_specification
491        .as_ref()
492        .map(|i| &i.color_space)
493    {
494        match e {
495            EnumeratedColorspace::Sycc => {
496                dispatch!(Level::new(), simd => {
497                    sycc_to_rgb(simd, image.decoded_components, bit_depth)
498                })?;
499            }
500            EnumeratedColorspace::CieLab(cielab) => {
501                dispatch!(Level::new(), simd => {
502                    cielab_to_rgb(simd, image.decoded_components, bit_depth, cielab)
503                })?;
504            }
505            _ => {}
506        }
507    }
508
509    Ok(())
510}
511
512fn get_color_space(boxes: &ImageBoxes, num_components: usize) -> Result<ColorSpace> {
513    let cs = match boxes
514        .color_specification
515        .as_ref()
516        .map(|c| &c.color_space)
517        .unwrap_or(&jp2::colr::ColorSpace::Unknown)
518    {
519        jp2::colr::ColorSpace::Enumerated(e) => {
520            match e {
521                EnumeratedColorspace::Cmyk => ColorSpace::CMYK,
522                EnumeratedColorspace::Srgb => ColorSpace::RGB,
523                EnumeratedColorspace::RommRgb => {
524                    // Use an ICC profile to process the RommRGB color space.
525                    ColorSpace::Icc {
526                        profile: include_bytes!("../assets/ProPhoto-v2-micro.icc").to_vec(),
527                        num_channels: 3,
528                    }
529                }
530                EnumeratedColorspace::EsRgb => ColorSpace::RGB,
531                EnumeratedColorspace::Greyscale => ColorSpace::Gray,
532                EnumeratedColorspace::Sycc => ColorSpace::RGB,
533                EnumeratedColorspace::CieLab(_) => ColorSpace::Icc {
534                    profile: include_bytes!("../assets/LAB.icc").to_vec(),
535                    num_channels: 3,
536                },
537                _ => bail!(FormatError::Unsupported),
538            }
539        }
540        jp2::colr::ColorSpace::Icc(icc) => {
541            if let Some(metadata) = ICCMetadata::from_data(icc) {
542                ColorSpace::Icc {
543                    profile: icc.clone(),
544                    num_channels: metadata.color_space.num_components(),
545                }
546            } else {
547                // See OPENJPEG test orb-blue10-lin-jp2.jp2. They seem to
548                // assume RGB in this case (even though the image has 4
549                // components with no opacity channel, they assume RGBA instead
550                // of CMYK).
551                ColorSpace::RGB
552            }
553        }
554        jp2::colr::ColorSpace::Unknown => match num_components {
555            1 => ColorSpace::Gray,
556            3 => ColorSpace::RGB,
557            4 => ColorSpace::CMYK,
558            _ => ColorSpace::Unknown {
559                num_channels: num_components as u8,
560            },
561        },
562    };
563
564    Ok(cs)
565}
566
567fn resolve_palette_indices(
568    components: Vec<ComponentData>,
569    boxes: &ImageBoxes,
570) -> Result<Vec<ComponentData>> {
571    let Some(palette) = boxes.palette.as_ref() else {
572        // Nothing to resolve.
573        return Ok(components);
574    };
575
576    let mapping = boxes.component_mapping.as_ref().unwrap();
577    let mut resolved = Vec::with_capacity(mapping.entries.len());
578
579    for entry in &mapping.entries {
580        let component_idx = entry.component_index as usize;
581        let component = components
582            .get(component_idx)
583            .ok_or(ColorError::PaletteResolutionFailed)?;
584
585        match entry.mapping_type {
586            ComponentMappingType::Direct => resolved.push(component.clone()),
587            ComponentMappingType::Palette { column } => {
588                let column_idx = column as usize;
589                let column_info = palette
590                    .columns
591                    .get(column_idx)
592                    .ok_or(ColorError::PaletteResolutionFailed)?;
593
594                let mut mapped =
595                    Vec::with_capacity(component.container.truncated().len() + SIMD_WIDTH);
596
597                for &sample in component.container.truncated() {
598                    let index = math::round_f32(sample) as i64;
599                    let value = palette
600                        .map(index as usize, column_idx)
601                        .ok_or(ColorError::PaletteResolutionFailed)?;
602                    mapped.push(value as f32);
603                }
604
605                resolved.push(ComponentData {
606                    container: math::SimdBuffer::new(mapped),
607                    bit_depth: column_info.bit_depth,
608                });
609            }
610        }
611    }
612
613    Ok(resolved)
614}
615
616#[inline(always)]
617fn cielab_to_rgb<S: Simd>(
618    simd: S,
619    components: &mut [ComponentData],
620    bit_depth: u8,
621    lab: &CieLab,
622) -> Result<()> {
623    let (head, _) = components
624        .split_at_mut_checked(3)
625        .ok_or(ColorError::LabConversionFailed)?;
626
627    let [l, a, b] = head else {
628        unreachable!();
629    };
630
631    let prec0 = l.bit_depth;
632    let prec1 = a.bit_depth;
633    let prec2 = b.bit_depth;
634
635    // Prevent underflows/divisions by zero further below.
636    if prec0 < 4 || prec1 < 4 || prec2 < 4 {
637        bail!(ColorError::LabConversionFailed);
638    }
639
640    let rl = lab.rl.unwrap_or(100);
641    let ra = lab.ra.unwrap_or(170);
642    let rb = lab.ra.unwrap_or(200);
643    let ol = lab.ol.unwrap_or(0);
644    let oa = lab.oa.unwrap_or(1 << (bit_depth - 1));
645    let ob = lab
646        .ob
647        .unwrap_or((1 << (bit_depth - 2)) + (1 << (bit_depth - 3)));
648
649    // Copied from OpenJPEG.
650    let min_l = -(rl as f32 * ol as f32) / ((1 << prec0) - 1) as f32;
651    let max_l = min_l + rl as f32;
652    let min_a = -(ra as f32 * oa as f32) / ((1 << prec1) - 1) as f32;
653    let max_a = min_a + ra as f32;
654    let min_b = -(rb as f32 * ob as f32) / ((1 << prec2) - 1) as f32;
655    let max_b = min_b + rb as f32;
656
657    let bit_max = (1_u32 << bit_depth) - 1;
658
659    // Note that we are not doing the actual conversion with the ICC profile yet,
660    // just decoding the raw LAB values.
661    // We leave applying the ICC profile to the user.
662    let divisor_l = ((1 << prec0) - 1) as f32;
663    let divisor_a = ((1 << prec1) - 1) as f32;
664    let divisor_b = ((1 << prec2) - 1) as f32;
665
666    let scale_l_final = bit_max as f32 / 100.0;
667    let scale_ab_final = bit_max as f32 / 255.0;
668
669    let l_offset = min_l * scale_l_final;
670    let l_scale = (max_l - min_l) / divisor_l * scale_l_final;
671    let a_offset = (min_a + 128.0) * scale_ab_final;
672    let a_scale = (max_a - min_a) / divisor_a * scale_ab_final;
673    let b_offset = (min_b + 128.0) * scale_ab_final;
674    let b_scale = (max_b - min_b) / divisor_b * scale_ab_final;
675
676    let l_offset_v = f32x8::splat(simd, l_offset);
677    let l_scale_v = f32x8::splat(simd, l_scale);
678    let a_offset_v = f32x8::splat(simd, a_offset);
679    let a_scale_v = f32x8::splat(simd, a_scale);
680    let b_offset_v = f32x8::splat(simd, b_offset);
681    let b_scale_v = f32x8::splat(simd, b_scale);
682
683    // Note that we are not doing the actual conversion with the ICC profile yet,
684    // just decoding the raw LAB values.
685    // We leave applying the ICC profile to the user.
686    for ((l_chunk, a_chunk), b_chunk) in l
687        .container
688        .chunks_exact_mut(SIMD_WIDTH)
689        .zip(a.container.chunks_exact_mut(SIMD_WIDTH))
690        .zip(b.container.chunks_exact_mut(SIMD_WIDTH))
691    {
692        let l_v = f32x8::from_slice(simd, l_chunk);
693        let a_v = f32x8::from_slice(simd, a_chunk);
694        let b_v = f32x8::from_slice(simd, b_chunk);
695
696        l_v.mul_add(l_scale_v, l_offset_v).store(l_chunk);
697        a_v.mul_add(a_scale_v, a_offset_v).store(a_chunk);
698        b_v.mul_add(b_scale_v, b_offset_v).store(b_chunk);
699    }
700
701    Ok(())
702}
703
704#[inline(always)]
705fn sycc_to_rgb<S: Simd>(simd: S, components: &mut [ComponentData], bit_depth: u8) -> Result<()> {
706    let offset = (1_u32 << (bit_depth as u32 - 1)) as f32;
707    let max_value = ((1_u32 << bit_depth as u32) - 1) as f32;
708
709    let (head, _) = components
710        .split_at_mut_checked(3)
711        .ok_or(ColorError::SyccConversionFailed)?;
712
713    let [y, cb, cr] = head else {
714        unreachable!();
715    };
716
717    let offset_v = f32x8::splat(simd, offset);
718    let max_v = f32x8::splat(simd, max_value);
719    let zero_v = f32x8::splat(simd, 0.0);
720    let cr_to_r = f32x8::splat(simd, 1.402);
721    let cb_to_g = f32x8::splat(simd, -0.344136);
722    let cr_to_g = f32x8::splat(simd, -0.714136);
723    let cb_to_b = f32x8::splat(simd, 1.772);
724
725    for ((y_chunk, cb_chunk), cr_chunk) in y
726        .container
727        .chunks_exact_mut(SIMD_WIDTH)
728        .zip(cb.container.chunks_exact_mut(SIMD_WIDTH))
729        .zip(cr.container.chunks_exact_mut(SIMD_WIDTH))
730    {
731        let y_v = f32x8::from_slice(simd, y_chunk);
732        let cb_v = f32x8::from_slice(simd, cb_chunk) - offset_v;
733        let cr_v = f32x8::from_slice(simd, cr_chunk) - offset_v;
734
735        // r = y + 1.402 * cr
736        let r = cr_v.mul_add(cr_to_r, y_v);
737        // g = y - 0.344136 * cb - 0.714136 * cr
738        let g = cr_v.mul_add(cr_to_g, cb_v.mul_add(cb_to_g, y_v));
739        // b = y + 1.772 * cb
740        let b = cb_v.mul_add(cb_to_b, y_v);
741
742        r.min(max_v).max(zero_v).store(y_chunk);
743        g.min(max_v).max(zero_v).store(cb_chunk);
744        b.min(max_v).max(zero_v).store(cr_chunk);
745    }
746
747    Ok(())
748}