linear_srgb/
simd.rs

1//! SIMD-accelerated sRGB ↔ linear conversion.
2//!
3//! This module provides high-performance conversion functions using AVX2/SSE SIMD
4//! instructions via the `wide` crate with runtime CPU feature detection.
5//!
6//! # API Overview
7//!
8//! ## x8 Functions (process 8 values at once)
9//! - [`srgb_to_linear_x8`] - f32x8 sRGB → f32x8 linear
10//! - [`linear_to_srgb_x8`] - f32x8 linear → f32x8 sRGB
11//! - [`srgb_u8_to_linear_x8`] - \[u8; 8\] sRGB → f32x8 linear
12//! - [`linear_to_srgb_u8_x8`] - f32x8 linear → \[u8; 8\] sRGB
13//!
14//! ## Slice Functions (process entire slices)
15//! - [`srgb_to_linear_slice`] - &mut \[f32\] sRGB → linear in-place
16//! - [`linear_to_srgb_slice`] - &mut \[f32\] linear → sRGB in-place
17//! - [`srgb_u8_to_linear_slice`] - &\[u8\] sRGB → &mut \[f32\] linear
18//! - [`linear_to_srgb_u8_slice`] - &\[f32\] linear → &mut \[u8\] sRGB
19
20use multiversed::multiversed;
21use wide::{CmpLt, f32x8};
22
23use crate::fast_math::pow_x8;
24
25// sRGB transfer function constants (IEC 61966-2-1)
26const SRGB_LINEAR_THRESHOLD: f32x8 = f32x8::splat(0.039_293_37);
27const LINEAR_THRESHOLD: f32x8 = f32x8::splat(0.003_041_282_6);
28const LINEAR_SCALE: f32x8 = f32x8::splat(1.0 / 12.92);
29const SRGB_OFFSET: f32x8 = f32x8::splat(0.055);
30const SRGB_SCALE: f32x8 = f32x8::splat(1.055);
31const TWELVE_92: f32x8 = f32x8::splat(12.92);
32const ZERO: f32x8 = f32x8::splat(0.0);
33const ONE: f32x8 = f32x8::splat(1.0);
34const U8_MAX: f32x8 = f32x8::splat(255.0);
35const HALF: f32x8 = f32x8::splat(0.5);
36
37/// Precomputed sRGB u8 → linear f32 lookup table.
38/// Uses the same constants as the transfer module (C0-continuous IEC 61966-2-1).
39/// Generated by computing `srgb_u8_to_linear(i)` for each i in 0..=255.
40/// To regenerate: `cargo run --release --example generate_lut`
41const SRGB_U8_TO_LINEAR_LUT: [f32; 256] = [
42    0.0_f32,
43    0.000303527_f32,
44    0.000607054_f32,
45    0.000910581_f32,
46    0.001214108_f32,
47    0.001517635_f32,
48    0.001821162_f32,
49    0.0021246888_f32,
50    0.002428216_f32,
51    0.002731743_f32,
52    0.00303527_f32,
53    0.0033473307_f32,
54    0.0036773437_f32,
55    0.0040255957_f32,
56    0.004392362_f32,
57    0.004777916_f32,
58    0.0051825214_f32,
59    0.00560644_f32,
60    0.006049924_f32,
61    0.0065132244_f32,
62    0.0069965874_f32,
63    0.007500253_f32,
64    0.008024457_f32,
65    0.008569433_f32,
66    0.009135411_f32,
67    0.009722613_f32,
68    0.010331264_f32,
69    0.010961577_f32,
70    0.011613773_f32,
71    0.012288062_f32,
72    0.012984648_f32,
73    0.013703744_f32,
74    0.01444555_f32,
75    0.015210266_f32,
76    0.01599809_f32,
77    0.016809216_f32,
78    0.01764384_f32,
79    0.018502146_f32,
80    0.019384334_f32,
81    0.02029058_f32,
82    0.02122107_f32,
83    0.022175988_f32,
84    0.023155512_f32,
85    0.024159823_f32,
86    0.025189094_f32,
87    0.026243499_f32,
88    0.027323212_f32,
89    0.0284284_f32,
90    0.02955924_f32,
91    0.030715894_f32,
92    0.03189852_f32,
93    0.0331073_f32,
94    0.034342386_f32,
95    0.03560393_f32,
96    0.036892105_f32,
97    0.03820707_f32,
98    0.039548974_f32,
99    0.04091798_f32,
100    0.04231424_f32,
101    0.04373789_f32,
102    0.045189105_f32,
103    0.04666803_f32,
104    0.04817481_f32,
105    0.049709592_f32,
106    0.051272515_f32,
107    0.052863743_f32,
108    0.054483414_f32,
109    0.05613167_f32,
110    0.05780865_f32,
111    0.05951448_f32,
112    0.061249338_f32,
113    0.063013345_f32,
114    0.06480663_f32,
115    0.06662934_f32,
116    0.068481594_f32,
117    0.07036356_f32,
118    0.072275355_f32,
119    0.07421711_f32,
120    0.07618896_f32,
121    0.07819102_f32,
122    0.080223456_f32,
123    0.08228638_f32,
124    0.08437992_f32,
125    0.086504206_f32,
126    0.088659346_f32,
127    0.09084551_f32,
128    0.093062796_f32,
129    0.09531133_f32,
130    0.09759124_f32,
131    0.09990266_f32,
132    0.10224568_f32,
133    0.104620464_f32,
134    0.10702711_f32,
135    0.109465756_f32,
136    0.1119365_f32,
137    0.11443946_f32,
138    0.116974786_f32,
139    0.11954258_f32,
140    0.12214295_f32,
141    0.12477602_f32,
142    0.1274419_f32,
143    0.13014072_f32,
144    0.1328726_f32,
145    0.13563763_f32,
146    0.13843594_f32,
147    0.14126763_f32,
148    0.14413282_f32,
149    0.14703165_f32,
150    0.1499642_f32,
151    0.15293059_f32,
152    0.15593089_f32,
153    0.15896529_f32,
154    0.16203386_f32,
155    0.1651367_f32,
156    0.16827393_f32,
157    0.17144562_f32,
158    0.17465195_f32,
159    0.17789298_f32,
160    0.18116882_f32,
161    0.1844796_f32,
162    0.18782537_f32,
163    0.1912063_f32,
164    0.19462249_f32,
165    0.19807397_f32,
166    0.2015609_f32,
167    0.20508343_f32,
168    0.20864154_f32,
169    0.21223548_f32,
170    0.21586527_f32,
171    0.21953095_f32,
172    0.22323275_f32,
173    0.22697066_f32,
174    0.23074481_f32,
175    0.2345554_f32,
176    0.23840237_f32,
177    0.24228595_f32,
178    0.24620613_f32,
179    0.25016314_f32,
180    0.25415692_f32,
181    0.25818765_f32,
182    0.26225552_f32,
183    0.26636043_f32,
184    0.27050266_f32,
185    0.27468216_f32,
186    0.27889907_f32,
187    0.2831536_f32,
188    0.28744566_f32,
189    0.29177552_f32,
190    0.2961431_f32,
191    0.30054858_f32,
192    0.30499217_f32,
193    0.30947372_f32,
194    0.31399357_f32,
195    0.3185516_f32,
196    0.32314798_f32,
197    0.3277829_f32,
198    0.33245632_f32,
199    0.33716843_f32,
200    0.34191918_f32,
201    0.34670877_f32,
202    0.35153738_f32,
203    0.35640487_f32,
204    0.36131153_f32,
205    0.3662573_f32,
206    0.37124234_f32,
207    0.37626684_f32,
208    0.38133067_f32,
209    0.3864341_f32,
210    0.39157712_f32,
211    0.3967598_f32,
212    0.4019824_f32,
213    0.40724477_f32,
214    0.4125472_f32,
215    0.41788962_f32,
216    0.42327216_f32,
217    0.42869502_f32,
218    0.4341581_f32,
219    0.43966165_f32,
220    0.44520563_f32,
221    0.45079017_f32,
222    0.4564154_f32,
223    0.46208134_f32,
224    0.46778816_f32,
225    0.4735358_f32,
226    0.47932443_f32,
227    0.4851542_f32,
228    0.49102503_f32,
229    0.49693722_f32,
230    0.5028906_f32,
231    0.5088854_f32,
232    0.5149218_f32,
233    0.5209996_f32,
234    0.52711916_f32,
235    0.5332804_f32,
236    0.53948337_f32,
237    0.5457284_f32,
238    0.55201524_f32,
239    0.55834424_f32,
240    0.56471527_f32,
241    0.57112855_f32,
242    0.57758415_f32,
243    0.58408207_f32,
244    0.5906225_f32,
245    0.59720534_f32,
246    0.6038308_f32,
247    0.6104991_f32,
248    0.61721_f32,
249    0.62396383_f32,
250    0.6307605_f32,
251    0.6376001_f32,
252    0.644483_f32,
253    0.6514088_f32,
254    0.658378_f32,
255    0.6653904_f32,
256    0.67244613_f32,
257    0.67954546_f32,
258    0.68668824_f32,
259    0.6938747_f32,
260    0.7011047_f32,
261    0.7083785_f32,
262    0.7156962_f32,
263    0.72305775_f32,
264    0.7304634_f32,
265    0.73791295_f32,
266    0.7454066_f32,
267    0.75294465_f32,
268    0.76052684_f32,
269    0.7681535_f32,
270    0.7758244_f32,
271    0.7835399_f32,
272    0.79130006_f32,
273    0.79910475_f32,
274    0.80695426_f32,
275    0.8148484_f32,
276    0.82278764_f32,
277    0.8307716_f32,
278    0.83880067_f32,
279    0.8468749_f32,
280    0.8549941_f32,
281    0.8631587_f32,
282    0.8713685_f32,
283    0.87962353_f32,
284    0.8879244_f32,
285    0.89627033_f32,
286    0.9046623_f32,
287    0.9130995_f32,
288    0.9215827_f32,
289    0.9301116_f32,
290    0.93868643_f32,
291    0.9473071_f32,
292    0.9559739_f32,
293    0.9646866_f32,
294    0.9734457_f32,
295    0.9822507_f32,
296    0.9911024_f32,
297    1.0_f32,
298];
299
300#[inline]
301fn get_lut() -> &'static [f32; 256] {
302    &SRGB_U8_TO_LINEAR_LUT
303}
304
305// ============================================================================
306// x8 Functions - Process 8 values at once
307// ============================================================================
308
309/// Convert 8 sRGB f32 values to linear.
310///
311/// Input values are clamped to \[0, 1\].
312///
313/// # Example
314/// ```
315/// use linear_srgb::simd::srgb_to_linear_x8;
316/// use wide::f32x8;
317///
318/// let srgb = f32x8::from([0.0, 0.25, 0.5, 0.75, 1.0, 0.1, 0.9, 0.5]);
319/// let linear = srgb_to_linear_x8(srgb);
320/// ```
321#[multiversed]
322#[inline]
323pub fn srgb_to_linear_x8(srgb: f32x8) -> f32x8 {
324    let srgb = srgb.max(ZERO).min(ONE);
325    let linear_result = srgb * LINEAR_SCALE;
326    let power_result = pow_x8((srgb + SRGB_OFFSET) / SRGB_SCALE, 2.4);
327    let mask = srgb.simd_lt(SRGB_LINEAR_THRESHOLD);
328    mask.blend(linear_result, power_result)
329}
330
331/// Convert 8 linear f32 values to sRGB.
332///
333/// Input values are clamped to \[0, 1\].
334///
335/// # Example
336/// ```
337/// use linear_srgb::simd::linear_to_srgb_x8;
338/// use wide::f32x8;
339///
340/// let linear = f32x8::from([0.0, 0.1, 0.2, 0.5, 1.0, 0.01, 0.05, 0.8]);
341/// let srgb = linear_to_srgb_x8(linear);
342/// ```
343#[multiversed]
344#[inline]
345pub fn linear_to_srgb_x8(linear: f32x8) -> f32x8 {
346    let linear = linear.max(ZERO).min(ONE);
347    let linear_result = linear * TWELVE_92;
348    let power_result = SRGB_SCALE * pow_x8(linear, 1.0 / 2.4) - SRGB_OFFSET;
349    let mask = linear.simd_lt(LINEAR_THRESHOLD);
350    mask.blend(linear_result, power_result)
351}
352
353/// Convert 8 sRGB u8 values to linear f32 using LUT lookup.
354///
355/// This is the fastest method for u8 input as it uses a precomputed lookup table.
356///
357/// # Example
358/// ```
359/// use linear_srgb::simd::srgb_u8_to_linear_x8;
360///
361/// let srgb = [0u8, 64, 128, 192, 255, 32, 96, 160];
362/// let linear = srgb_u8_to_linear_x8(srgb);
363/// ```
364#[inline]
365pub fn srgb_u8_to_linear_x8(srgb: [u8; 8]) -> f32x8 {
366    let lut = get_lut();
367    f32x8::from([
368        lut[srgb[0] as usize],
369        lut[srgb[1] as usize],
370        lut[srgb[2] as usize],
371        lut[srgb[3] as usize],
372        lut[srgb[4] as usize],
373        lut[srgb[5] as usize],
374        lut[srgb[6] as usize],
375        lut[srgb[7] as usize],
376    ])
377}
378
379/// Convert 8 linear f32 values to sRGB u8.
380///
381/// Input values are clamped to \[0, 1\], output is rounded to nearest u8.
382///
383/// # Example
384/// ```
385/// use linear_srgb::simd::linear_to_srgb_u8_x8;
386/// use wide::f32x8;
387///
388/// let linear = f32x8::from([0.0, 0.1, 0.2, 0.5, 1.0, 0.01, 0.05, 0.8]);
389/// let srgb = linear_to_srgb_u8_x8(linear);
390/// ```
391#[multiversed]
392#[inline]
393pub fn linear_to_srgb_u8_x8(linear: f32x8) -> [u8; 8] {
394    let srgb = linear_to_srgb_x8(linear);
395    let scaled = srgb * U8_MAX + HALF;
396    let arr: [f32; 8] = scaled.into();
397    [
398        arr[0] as u8,
399        arr[1] as u8,
400        arr[2] as u8,
401        arr[3] as u8,
402        arr[4] as u8,
403        arr[5] as u8,
404        arr[6] as u8,
405        arr[7] as u8,
406    ]
407}
408
409// ============================================================================
410// Slice Functions - Process entire slices
411// ============================================================================
412
413/// Convert sRGB f32 values to linear in-place.
414///
415/// Processes 8 values at a time using SIMD, with scalar fallback for remainder.
416///
417/// # Example
418/// ```
419/// use linear_srgb::simd::srgb_to_linear_slice;
420///
421/// let mut values = vec![0.0f32, 0.25, 0.5, 0.75, 1.0];
422/// srgb_to_linear_slice(&mut values);
423/// ```
424#[multiversed]
425#[inline]
426pub fn srgb_to_linear_slice(values: &mut [f32]) {
427    let (chunks, remainder) = values.as_chunks_mut::<8>();
428
429    for chunk in chunks {
430        let result = srgb_to_linear_x8(f32x8::from(*chunk));
431        *chunk = result.into();
432    }
433
434    for v in remainder {
435        *v = crate::srgb_to_linear(*v);
436    }
437}
438
439/// Convert linear f32 values to sRGB in-place.
440///
441/// Processes 8 values at a time using SIMD, with scalar fallback for remainder.
442///
443/// # Example
444/// ```
445/// use linear_srgb::simd::linear_to_srgb_slice;
446///
447/// let mut values = vec![0.0f32, 0.1, 0.2, 0.5, 1.0];
448/// linear_to_srgb_slice(&mut values);
449/// ```
450#[multiversed]
451#[inline]
452pub fn linear_to_srgb_slice(values: &mut [f32]) {
453    let (chunks, remainder) = values.as_chunks_mut::<8>();
454
455    for chunk in chunks {
456        let result = linear_to_srgb_x8(f32x8::from(*chunk));
457        *chunk = result.into();
458    }
459
460    for v in remainder {
461        *v = crate::linear_to_srgb(*v);
462    }
463}
464
465/// Convert sRGB u8 values to linear f32.
466///
467/// Uses a precomputed LUT for each u8 value, processed in SIMD batches of 8.
468///
469/// # Panics
470/// Panics if `input.len() != output.len()`.
471///
472/// # Example
473/// ```
474/// use linear_srgb::simd::srgb_u8_to_linear_slice;
475///
476/// let input: Vec<u8> = (0..=255).collect();
477/// let mut output = vec![0.0f32; 256];
478/// srgb_u8_to_linear_slice(&input, &mut output);
479/// ```
480#[inline]
481pub fn srgb_u8_to_linear_slice(input: &[u8], output: &mut [f32]) {
482    assert_eq!(input.len(), output.len());
483    let lut = get_lut();
484
485    let (in_chunks, in_remainder) = input.as_chunks::<8>();
486    let (out_chunks, out_remainder) = output.as_chunks_mut::<8>();
487
488    for (inp, out) in in_chunks.iter().zip(out_chunks.iter_mut()) {
489        *out = [
490            lut[inp[0] as usize],
491            lut[inp[1] as usize],
492            lut[inp[2] as usize],
493            lut[inp[3] as usize],
494            lut[inp[4] as usize],
495            lut[inp[5] as usize],
496            lut[inp[6] as usize],
497            lut[inp[7] as usize],
498        ];
499    }
500
501    for (inp, out) in in_remainder.iter().zip(out_remainder.iter_mut()) {
502        *out = lut[*inp as usize];
503    }
504}
505
506/// Convert linear f32 values to sRGB u8.
507///
508/// Processes 8 values at a time using SIMD, with scalar fallback for remainder.
509///
510/// # Panics
511/// Panics if `input.len() != output.len()`.
512///
513/// # Example
514/// ```
515/// use linear_srgb::simd::linear_to_srgb_u8_slice;
516///
517/// let input: Vec<f32> = (0..=255).map(|i| i as f32 / 255.0).collect();
518/// let mut output = vec![0u8; 256];
519/// linear_to_srgb_u8_slice(&input, &mut output);
520/// ```
521#[multiversed]
522#[inline]
523pub fn linear_to_srgb_u8_slice(input: &[f32], output: &mut [u8]) {
524    assert_eq!(input.len(), output.len());
525
526    let (in_chunks, in_remainder) = input.as_chunks::<8>();
527    let (out_chunks, out_remainder) = output.as_chunks_mut::<8>();
528
529    for (inp, out) in in_chunks.iter().zip(out_chunks.iter_mut()) {
530        *out = linear_to_srgb_u8_x8(f32x8::from(*inp));
531    }
532
533    for (inp, out) in in_remainder.iter().zip(out_remainder.iter_mut()) {
534        let srgb = crate::linear_to_srgb(*inp);
535        *out = (srgb * 255.0 + 0.5) as u8;
536    }
537}
538
539// ============================================================================
540// Tests
541// ============================================================================
542
543#[cfg(test)]
544mod tests {
545    use super::*;
546
547    #[cfg(not(feature = "std"))]
548    use alloc::{vec, vec::Vec};
549
550    // ---- x8 function tests ----
551
552    #[test]
553    fn test_srgb_to_linear_x8() {
554        let input = [0.0f32, 0.25, 0.5, 0.75, 1.0, 0.1, 0.9, 0.04];
555        let result = srgb_to_linear_x8(f32x8::from(input));
556        let result_arr: [f32; 8] = result.into();
557
558        for (i, &inp) in input.iter().enumerate() {
559            let expected = crate::srgb_to_linear(inp);
560            assert!(
561                (result_arr[i] - expected).abs() < 1e-5,
562                "srgb_to_linear_x8 mismatch at {}: got {}, expected {}",
563                i,
564                result_arr[i],
565                expected
566            );
567        }
568    }
569
570    #[test]
571    fn test_linear_to_srgb_x8() {
572        let input = [0.0f32, 0.1, 0.2, 0.5, 1.0, 0.01, 0.001, 0.8];
573        let result = linear_to_srgb_x8(f32x8::from(input));
574        let result_arr: [f32; 8] = result.into();
575
576        for (i, &inp) in input.iter().enumerate() {
577            let expected = crate::linear_to_srgb(inp);
578            assert!(
579                (result_arr[i] - expected).abs() < 1e-5,
580                "linear_to_srgb_x8 mismatch at {}: got {}, expected {}",
581                i,
582                result_arr[i],
583                expected
584            );
585        }
586    }
587
588    #[test]
589    fn test_srgb_u8_to_linear_x8() {
590        let input: [u8; 8] = [0, 64, 128, 192, 255, 32, 96, 160];
591        let result = srgb_u8_to_linear_x8(input);
592        let result_arr: [f32; 8] = result.into();
593
594        for (i, &inp) in input.iter().enumerate() {
595            let expected = crate::srgb_u8_to_linear(inp);
596            assert!(
597                (result_arr[i] - expected).abs() < 1e-6,
598                "srgb_u8_to_linear_x8 mismatch at {}: got {}, expected {}",
599                i,
600                result_arr[i],
601                expected
602            );
603        }
604    }
605
606    #[test]
607    fn test_linear_to_srgb_u8_x8() {
608        let input = [0.0f32, 0.1, 0.2, 0.5, 1.0, 0.01, 0.05, 0.8];
609        let result = linear_to_srgb_u8_x8(f32x8::from(input));
610
611        for (i, &inp) in input.iter().enumerate() {
612            let expected = (crate::linear_to_srgb(inp) * 255.0 + 0.5) as u8;
613            assert!(
614                (result[i] as i16 - expected as i16).abs() <= 1,
615                "linear_to_srgb_u8_x8 mismatch at {}: got {}, expected {}",
616                i,
617                result[i],
618                expected
619            );
620        }
621    }
622
623    // ---- Slice function tests ----
624
625    #[test]
626    fn test_srgb_to_linear_slice() {
627        let mut values: Vec<f32> = (0..100).map(|i| i as f32 / 99.0).collect();
628        let expected: Vec<f32> = values.iter().map(|&v| crate::srgb_to_linear(v)).collect();
629
630        srgb_to_linear_slice(&mut values);
631
632        for (i, (&got, &exp)) in values.iter().zip(expected.iter()).enumerate() {
633            assert!(
634                (got - exp).abs() < 1e-5,
635                "srgb_to_linear_slice mismatch at {}: got {}, expected {}",
636                i,
637                got,
638                exp
639            );
640        }
641    }
642
643    #[test]
644    fn test_linear_to_srgb_slice() {
645        let mut values: Vec<f32> = (0..100).map(|i| i as f32 / 99.0).collect();
646        let expected: Vec<f32> = values.iter().map(|&v| crate::linear_to_srgb(v)).collect();
647
648        linear_to_srgb_slice(&mut values);
649
650        for (i, (&got, &exp)) in values.iter().zip(expected.iter()).enumerate() {
651            assert!(
652                (got - exp).abs() < 1e-5,
653                "linear_to_srgb_slice mismatch at {}: got {}, expected {}",
654                i,
655                got,
656                exp
657            );
658        }
659    }
660
661    #[test]
662    fn test_srgb_u8_to_linear_slice() {
663        let input: Vec<u8> = (0..=255).collect();
664        let mut output = vec![0.0f32; 256];
665
666        srgb_u8_to_linear_slice(&input, &mut output);
667
668        for (i, &out) in output.iter().enumerate() {
669            let expected = crate::srgb_u8_to_linear(i as u8);
670            assert!(
671                (out - expected).abs() < 1e-6,
672                "srgb_u8_to_linear_slice mismatch at {}: got {}, expected {}",
673                i,
674                out,
675                expected
676            );
677        }
678    }
679
680    #[test]
681    fn test_linear_to_srgb_u8_slice() {
682        let input: Vec<f32> = (0..=255).map(|i| i as f32 / 255.0).collect();
683        let mut output = vec![0u8; 256];
684
685        linear_to_srgb_u8_slice(&input, &mut output);
686
687        for i in 0..256 {
688            let expected = (crate::linear_to_srgb(input[i]) * 255.0 + 0.5) as u8;
689            assert!(
690                (output[i] as i16 - expected as i16).abs() <= 1,
691                "linear_to_srgb_u8_slice mismatch at {}: got {}, expected {}",
692                i,
693                output[i],
694                expected
695            );
696        }
697    }
698
699    // ---- Roundtrip tests ----
700
701    #[test]
702    fn test_f32_roundtrip() {
703        let mut values: Vec<f32> = (0..1000).map(|i| i as f32 / 999.0).collect();
704        let original = values.clone();
705
706        srgb_to_linear_slice(&mut values);
707        linear_to_srgb_slice(&mut values);
708
709        for (i, (&orig, &conv)) in original.iter().zip(values.iter()).enumerate() {
710            assert!(
711                (orig - conv).abs() < 1e-4,
712                "f32 roundtrip failed at {}: {} -> {}",
713                i,
714                orig,
715                conv
716            );
717        }
718    }
719
720    #[test]
721    fn test_u8_roundtrip() {
722        let input: Vec<u8> = (0..=255).collect();
723        let mut linear = vec![0.0f32; 256];
724        let mut back = vec![0u8; 256];
725
726        srgb_u8_to_linear_slice(&input, &mut linear);
727        linear_to_srgb_u8_slice(&linear, &mut back);
728
729        for i in 0..256 {
730            assert!(
731                (input[i] as i16 - back[i] as i16).abs() <= 1,
732                "u8 roundtrip failed at {}: {} -> {} -> {}",
733                i,
734                input[i],
735                linear[i],
736                back[i]
737            );
738        }
739    }
740
741    // ---- Edge case tests ----
742
743    #[test]
744    fn test_clamping() {
745        // Test that out-of-range values are clamped
746        let input = f32x8::from([-0.5, -0.1, 0.0, 0.5, 1.0, 1.5, 2.0, 10.0]);
747        let result = srgb_to_linear_x8(input);
748        let arr: [f32; 8] = result.into();
749
750        assert_eq!(arr[0], 0.0, "negative should clamp to 0");
751        assert_eq!(arr[1], 0.0, "negative should clamp to 0");
752        assert!(arr[4] > 0.99 && arr[4] <= 1.0, "1.0 should stay ~1.0");
753        assert!(arr[5] > 0.99 && arr[5] <= 1.0, "values > 1 should clamp");
754    }
755
756    #[test]
757    fn test_linear_segment() {
758        // Test values in the linear segment (< 0.04045)
759        let input = f32x8::from([0.0, 0.01, 0.02, 0.03, 0.04, 0.005, 0.015, 0.035]);
760        let result = srgb_to_linear_x8(input);
761        let arr: [f32; 8] = result.into();
762        let input_arr: [f32; 8] = input.into();
763
764        for i in 0..8 {
765            let expected = input_arr[i] / 12.92;
766            assert!(
767                (arr[i] - expected).abs() < 1e-6,
768                "linear segment mismatch at {}: got {}, expected {}",
769                i,
770                arr[i],
771                expected
772            );
773        }
774    }
775
776    /// Verify the const LUT stays in sync with the transfer function.
777    /// Allows 1 ULP difference for cross-platform float variance (powf isn't
778    /// perfectly deterministic across architectures).
779    #[test]
780    fn test_lut_matches_transfer_function() {
781        let lut = get_lut();
782        for i in 0..=255u8 {
783            let expected = crate::srgb_u8_to_linear(i);
784            let got = lut[i as usize];
785            let got_bits = got.to_bits();
786            let expected_bits = expected.to_bits();
787            let ulp_diff = (got_bits as i64 - expected_bits as i64).unsigned_abs();
788            assert!(
789                ulp_diff <= 1,
790                "LUT[{}] = {} ({:08x}) differs by {} ULP from srgb_u8_to_linear({}) = {} ({:08x}). \
791                 LUT needs regeneration if transfer constants changed.",
792                i,
793                got,
794                got_bits,
795                ulp_diff,
796                i,
797                expected,
798                expected_bits
799            );
800        }
801    }
802
803    #[test]
804    fn test_empty_slice() {
805        let mut empty: Vec<f32> = vec![];
806        srgb_to_linear_slice(&mut empty);
807        assert!(empty.is_empty());
808
809        let empty_u8: Vec<u8> = vec![];
810        let mut empty_out: Vec<f32> = vec![];
811        srgb_u8_to_linear_slice(&empty_u8, &mut empty_out);
812    }
813
814    #[test]
815    fn test_non_multiple_of_8() {
816        // Test slices that aren't multiples of 8
817        for len in [1, 3, 7, 9, 15, 17, 100] {
818            let mut values: Vec<f32> = (0..len).map(|i| i as f32 / len as f32).collect();
819            let expected: Vec<f32> = values.iter().map(|&v| crate::srgb_to_linear(v)).collect();
820
821            srgb_to_linear_slice(&mut values);
822
823            for (i, (&got, &exp)) in values.iter().zip(expected.iter()).enumerate() {
824                assert!(
825                    (got - exp).abs() < 1e-5,
826                    "len={} mismatch at {}: got {}, expected {}",
827                    len,
828                    i,
829                    got,
830                    exp
831                );
832            }
833        }
834    }
835}