linear_srgb/
lib.rs

1//! Fast linear↔sRGB color space conversion.
2//!
3//! This crate provides efficient conversion between linear light values and
4//! sRGB gamma-encoded values, with multiple implementation strategies for
5//! different accuracy/performance tradeoffs.
6//!
7//! # Module Organization
8//!
9//! - [`default`] — **Start here.** Rational polynomial for f32, LUT for integers, SIMD for slices.
10//! - [`precise`] — Exact `powf()` with C0-continuous constants. f32/f64, extended range. Slower.
11//! - [`tokens`] — Inlineable `#[rite]` functions for embedding in your own `#[arcane]` SIMD code.
12//! - [`lut`] — Lookup tables for custom bit depths (10-bit, 12-bit, 16-bit).
13//! - **`tf`** — Transfer functions beyond sRGB: BT.709, PQ, HLG. Requires `transfer` feature.
14//!
15//! # Quick Start
16//!
17//! ```rust
18//! use linear_srgb::default::{srgb_to_linear, linear_to_srgb};
19//!
20//! // Convert sRGB 0.5 to linear
21//! let linear = srgb_to_linear(0.5);
22//! assert!((linear - 0.214).abs() < 0.001);
23//!
24//! // Convert back to sRGB
25//! let srgb = linear_to_srgb(linear);
26//! assert!((srgb - 0.5).abs() < 0.001);
27//! ```
28//!
29//! # Batch Processing (SIMD)
30//!
31//! For maximum throughput on slices:
32//!
33//! ```rust
34//! use linear_srgb::default::{srgb_to_linear_slice, linear_to_srgb_slice};
35//!
36//! let mut values = vec![0.5f32; 10000];
37//! srgb_to_linear_slice(&mut values);  // SIMD-accelerated
38//! linear_to_srgb_slice(&mut values);
39//! ```
40//!
41//! # Custom Gamma
42//!
43//! For non-sRGB gamma (pure power function without linear segment):
44//!
45//! ```rust
46//! use linear_srgb::default::{gamma_to_linear, linear_to_gamma};
47//!
48//! let linear = gamma_to_linear(0.5, 2.2);  // gamma 2.2
49//! let encoded = linear_to_gamma(linear, 2.2);
50//! ```
51//!
52//! # LUT-based Conversion
53//!
54//! For batch processing with pre-computed lookup tables:
55//!
56//! ```rust
57//! use linear_srgb::default::SrgbConverter;
58//!
59//! let conv = SrgbConverter::new();  // Zero-cost, const tables
60//!
61//! // Fast 8-bit conversions
62//! let linear = conv.srgb_u8_to_linear(128);
63//! let srgb = conv.linear_to_srgb_u8(linear);
64//! ```
65//!
66//! # Choosing the Right API
67//!
68//! | Use Case | Recommended Function |
69//! |----------|---------------------|
70//! | Single f32 value | [`default::srgb_to_linear`] |
71//! | Single u8 value | [`default::srgb_u8_to_linear`] |
72//! | f32 slice (in-place) | [`default::srgb_to_linear_slice`] |
73//! | RGBA f32 slice (alpha-preserving) | [`default::srgb_to_linear_rgba_slice`] |
74//! | u8 slice → f32 slice | [`default::srgb_u8_to_linear_slice`] |
75//! | RGBA u8 → f32 (alpha-preserving) | [`default::srgb_u8_to_linear_rgba_slice`] |
76//! | u16 slice → f32 slice | [`default::srgb_u16_to_linear_slice`] |
77//! | Exact f32/f64 (powf) | [`precise::srgb_to_linear`] |
78//! | Extended range (HDR) | [`precise::srgb_to_linear_extended`] |
79//! | Inside `#[arcane]` | `tokens::x8::srgb_to_linear_v3` |
80//! | Custom bit depth LUT | [`lut::LinearTable16`] |
81//!
82//! # Clamping and Extended Range
83//!
84//! The f32↔f32 conversion functions come in two flavors: **clamped** (default)
85//! and **extended** (unclamped). Integer paths (u8, u16) always clamp since
86//! out-of-range values can't be represented in the output format.
87//!
88//! ## Clamped (default) — use for same-gamut pipelines
89//!
90//! All functions except the `_extended` variants clamp inputs to \[0, 1\]:
91//! negatives become 0, values above 1 become 1.
92//!
93//! This is correct whenever the source and destination share the same color
94//! space (gamut + transfer function). The typical pipeline:
95//!
96//! 1. Decode sRGB image (u8 → linear f32 via LUT, or f32 via TRC)
97//! 2. Process in linear light (resize, blur, blend, composite)
98//! 3. Re-encode to sRGB (linear f32 → sRGB f32 or u8)
99//!
100//! In this pipeline, out-of-range values only come from processing artifacts:
101//! resize filters with negative lobes (Lanczos, Mitchell, etc.) produce small
102//! negatives near dark edges and values slightly above 1.0 near bright edges.
103//! These are ringing artifacts, not real colors — clamping is correct.
104//!
105//! Float decoders like jpegli can also produce small out-of-range values from
106//! YCbCr quantization noise. When the image is sRGB, these are compression
107//! artifacts and clamping is correct — gives the same result as decoding to
108//! u8 first.
109//!
110//! ## Extended (unclamped) — use for cross-gamut pipelines
111//!
112//! [`precise::srgb_to_linear_extended`] and [`precise::linear_to_srgb_extended`]
113//! do not clamp. They follow the mathematical sRGB transfer function for all
114//! inputs: negatives pass through the linear segment, values above 1.0 pass
115//! through the power segment.
116//!
117//! Use these when the sRGB transfer function is applied to values from a
118//! **different, wider gamut**. A 3×3 matrix converting Rec. 2020 linear or
119//! Display P3 linear to sRGB linear can produce values well outside \[0, 1\]:
120//! a saturated Rec. 2020 green maps to deeply negative sRGB red and blue.
121//! These are real out-of-gamut colors, not artifacts — clamping destroys
122//! information that downstream gamut mapping or compositing may need.
123//!
124//! This matters in practice: JPEG and JPEG XL images can carry Rec. 2020 or
125//! Display P3 ICC profiles. Phones shoot Rec. 2020 HLG, cameras embed
126//! wide-gamut profiles. Decoding such an image and converting to sRGB for
127//! display produces out-of-gamut values that should survive until final
128//! output.
129//!
130//! If a float decoder (jpegli, libjxl) outputs wide-gamut data directly to
131//! f32, the output contains both small compression artifacts and real
132//! out-of-gamut values. The artifacts are tiny; the gamut excursions
133//! dominate. Using `_extended` preserves both — the artifacts are harmless
134//! noise that vanishes at quantization.
135//!
136//! The `_extended` variants also cover **scRGB** (float sRGB with values
137//! outside \[0, 1\] for HDR and wide color) and any pipeline where
138//! intermediate f32 values are not yet at the final output stage.
139//!
140//! ## Summary
141//!
142//! | Function | Range | Pipeline |
143//! |----------|-------|----------|
144//! | All `default::*_slice`, `tokens::*`, `lut::*` | \[0, 1\] | Same-gamut batch processing |
145//! | [`default::srgb_to_linear`] | \[0, 1\] | Same-gamut single values |
146//! | [`default::linear_to_srgb`] | \[0, 1\] | Same-gamut single values |
147//! | [`precise::srgb_to_linear_extended`] | Unbounded | Cross-gamut, scRGB, HDR |
148//! | [`precise::linear_to_srgb_extended`] | Unbounded | Cross-gamut, scRGB, HDR |
149//! | All u8/u16 paths | \[0, 1\] | Final quantization (clamp inherent) |
150//!
151//! **No SIMD extended-range variants exist yet.** The fast polynomial
152//! approximation is fitted to \[0, 1\] and produces garbage outside that
153//! domain. Extended-range SIMD would use `pow` instead of the polynomial
154//! (~3× slower, still faster than scalar for `linear_to_srgb`). For batch
155//! extended-range conversion today, loop over the [`precise`] `_extended`
156//! functions.
157//!
158//! # Feature Flags
159//!
160//! - **`std`** (default) — Enable runtime SIMD dispatch. Required for slice functions.
161//! - **`avx512`** (default) — Enable AVX-512 code paths and `tokens::x16` module.
162//! - **`transfer`** — BT.709, PQ, and HLG transfer functions in `tf` and [`tokens`].
163//! - **`alt`** — Alternative implementations for benchmarking (not stable API).
164//! - **`unsafe_simd`** — Union-based bit manipulation in SIMD paths.
165//!
166//! # `no_std` Support
167//!
168//! This crate is `no_std` compatible (requires `alloc` for LUT generation).
169//! Disable the `std` feature:
170//!
171//! ```toml
172//! linear-srgb = { version = "0.6", default-features = false }
173//! ```
174
175#![cfg_attr(not(feature = "std"), no_std)]
176#![cfg_attr(not(feature = "unsafe_simd"), deny(unsafe_code))]
177#![warn(missing_docs)]
178
179#[cfg(not(feature = "std"))]
180extern crate alloc;
181
182#[cfg(all(test, not(feature = "std")))]
183extern crate std;
184
185// ============================================================================
186// Public modules
187// ============================================================================
188
189/// Recommended API with optimal implementations for each use case.
190///
191/// Uses a libjxl rational polynomial for single f32 values (~110 ULP max at
192/// the piecewise threshold, <8 ULP elsewhere), LUT for integer types, and
193/// SIMD-dispatched batch processing for slices.
194pub mod default;
195
196/// Exact `powf()`-based conversions with C0-continuous constants.
197///
198/// Uses adjusted constants (from the moxcms reference implementation) that
199/// eliminate the IEC 61966-2-1 piecewise discontinuity. ~6 ULP max error
200/// vs f64 reference. See the module docs for the constant comparison table.
201///
202/// Also provides f64, extended-range (unclamped), and custom gamma functions.
203/// For faster alternatives, use [`default`].
204pub mod precise;
205
206/// Lookup table types for sRGB conversion.
207///
208/// Provides both build-time const tables ([`SrgbConverter`](lut::SrgbConverter))
209/// and runtime-generated tables for custom bit depths (10-bit, 12-bit, 16-bit).
210pub mod lut;
211
212/// Inlineable `#[rite]` functions for embedding in your own `#[arcane]` code.
213///
214/// These carry `#[target_feature]` + `#[inline]` directly — no wrapper, no
215/// dispatch. When called from a matching `#[arcane]` context, LLVM inlines
216/// them fully. Organized by SIMD width; suffixed by required token tier.
217///
218/// Also re-exports token types for convenience: `X64V3Token`, `X64V4Token`,
219/// `NeonToken`, `Wasm128Token` (each gated to its target architecture).
220///
221/// When the `transfer` feature is enabled, each width module also provides
222/// rites for BT.709, PQ, and HLG (prefixed with `tf_` for sRGB to avoid
223/// name collisions with the rational polynomial sRGB rites).
224pub mod tokens;
225
226/// Transfer functions: sRGB, BT.709, PQ (ST 2084), HLG (ARIB STD-B67).
227///
228/// Provides scalar functions for all four transfer curves. SIMD `#[rite]`
229/// versions live in [`tokens`] (x4/x8/x16).
230///
231/// Requires the `transfer` feature.
232#[cfg(feature = "transfer")]
233pub mod tf;
234
235// ============================================================================
236// Internal modules
237// ============================================================================
238
239pub(crate) mod scalar;
240pub(crate) mod simd;
241
242mod mlaf;
243
244// Rational polynomial sRGB approximation (shared coefficients + scalar evaluator)
245pub(crate) mod rational_poly;
246
247// Pre-computed const lookup tables (embedded in binary)
248mod const_luts;
249mod const_luts_u16;
250
251// Alternative/experimental implementations (for benchmarking, not stable API)
252#[cfg(feature = "alt")]
253#[doc(hidden)]
254pub mod alt;
255
256// ============================================================================
257// Tests
258// ============================================================================
259
260#[cfg(test)]
261mod tests {
262    use crate::default::*;
263
264    #[cfg(not(feature = "std"))]
265    use alloc::vec::Vec;
266
267    #[test]
268    fn test_api_consistency() {
269        // Ensure direct and LUT-based conversions are consistent
270        let conv = SrgbConverter::new();
271
272        for i in 0..=255u8 {
273            let direct = srgb_u8_to_linear(i);
274            let lut = conv.srgb_u8_to_linear(i);
275            assert!(
276                (direct - lut).abs() < 1e-5,
277                "Mismatch at {}: direct={}, lut={}",
278                i,
279                direct,
280                lut
281            );
282        }
283    }
284
285    #[test]
286    fn test_slice_conversion() {
287        let mut values: Vec<f32> = (0..=10).map(|i| i as f32 / 10.0).collect();
288        let original = values.clone();
289
290        srgb_to_linear_slice(&mut values);
291        linear_to_srgb_slice(&mut values);
292
293        for (i, (orig, conv)) in original.iter().zip(values.iter()).enumerate() {
294            assert!(
295                (orig - conv).abs() < 1e-5,
296                "Slice roundtrip failed at {}: {} -> {}",
297                i,
298                orig,
299                conv
300            );
301        }
302    }
303}
linear_srgb/lib.rs

linear_srgb/
lib.rs