linear_srgb/
lib.rs

1//! Fast linear↔sRGB color space conversion.
2//!
3//! This crate provides efficient conversion between linear light values and
4//! sRGB gamma-encoded values, with multiple implementation strategies for
5//! different accuracy/performance tradeoffs.
6//!
7//! # Module Organization
8//!
9//! - [`default`] — **Start here.** Rational polynomial for f32, LUT for integers, SIMD for slices.
10//! - [`precise`] — Exact `powf()` with C0-continuous constants. f32/f64, extended range. Slower.
11//! - [`tokens`] — Inlineable `#[rite]` functions for embedding in your own `#[arcane]` SIMD code.
12//! - [`lut`] — Lookup tables for custom bit depths (10-bit, 12-bit, 16-bit).
13//! - **`tf`** — Transfer functions beyond sRGB: BT.709, PQ, HLG. Requires `transfer` feature.
14//! - **`iec`** — IEC 61966-2-1 textbook constants (legacy interop). Requires `iec` feature.
15//!
16//! # Quick Start
17//!
18//! ```rust
19//! use linear_srgb::default::{srgb_to_linear, linear_to_srgb};
20//!
21//! // Convert sRGB 0.5 to linear
22//! let linear = srgb_to_linear(0.5);
23//! assert!((linear - 0.214).abs() < 0.001);
24//!
25//! // Convert back to sRGB
26//! let srgb = linear_to_srgb(linear);
27//! assert!((srgb - 0.5).abs() < 0.001);
28//! ```
29//!
30//! # Batch Processing (SIMD)
31//!
32//! For maximum throughput on slices:
33//!
34//! ```rust
35//! use linear_srgb::default::{srgb_to_linear_slice, linear_to_srgb_slice};
36//!
37//! let mut values = vec![0.5f32; 10000];
38//! srgb_to_linear_slice(&mut values);  // SIMD-accelerated
39//! linear_to_srgb_slice(&mut values);
40//! ```
41//!
42//! # Custom Gamma
43//!
44//! For non-sRGB gamma (pure power function without linear segment):
45//!
46//! ```rust
47//! use linear_srgb::default::{gamma_to_linear, linear_to_gamma};
48//!
49//! let linear = gamma_to_linear(0.5, 2.2);  // gamma 2.2
50//! let encoded = linear_to_gamma(linear, 2.2);
51//! ```
52//!
53//! # LUT-based Conversion
54//!
55//! For batch processing with pre-computed lookup tables:
56//!
57//! ```rust
58//! use linear_srgb::default::SrgbConverter;
59//!
60//! let conv = SrgbConverter::new();  // Zero-cost, const tables
61//!
62//! // Fast 8-bit conversions
63//! let linear = conv.srgb_u8_to_linear(128);
64//! let srgb = conv.linear_to_srgb_u8(linear);
65//! ```
66//!
67//! # Choosing the Right API
68//!
69//! | Use Case | Recommended Function |
70//! |----------|---------------------|
71//! | Single f32 value | [`default::srgb_to_linear`] |
72//! | Single u8 value | [`default::srgb_u8_to_linear`] |
73//! | f32 slice (in-place) | [`default::srgb_to_linear_slice`] |
74//! | RGBA f32 slice (alpha-preserving) | [`default::srgb_to_linear_rgba_slice`] |
75//! | u8 slice → f32 slice | [`default::srgb_u8_to_linear_slice`] |
76//! | RGBA u8 → f32 (alpha-preserving) | [`default::srgb_u8_to_linear_rgba_slice`] |
77//! | RGBA f32 sRGB → linear premul | [`default::srgb_to_linear_premultiply_rgba_slice`] |
78//! | RGBA u8 sRGB → linear premul f32 | [`default::srgb_u8_to_linear_premultiply_rgba_slice`] |
79//! | RGBA f32 linear premul → sRGB | [`default::unpremultiply_linear_to_srgb_rgba_slice`] |
80//! | RGBA f32 linear premul → sRGB u8 | [`default::unpremultiply_linear_to_srgb_u8_rgba_slice`] |
81//! | u16 → f32 slice | [`default::srgb_u16_to_linear_slice`] |
82//! | f32 → u16 (exact RT) | [`default::linear_to_srgb_u16`] |
83//! | f32 → u16 (fast, ±1 RT) | [`default::linear_to_srgb_u16_fast`] |
84//! | Exact f32/f64 (powf) | [`precise::srgb_to_linear`] |
85//! | Extended range (HDR) | [`precise::srgb_to_linear_extended`] |
86//! | Inside `#[arcane]` | `tokens::x8::srgb_to_linear_v3` |
87//! | Custom bit depth LUT | [`lut::LinearTable16`] |
88//!
89//! # Clamping and Extended Range
90//!
91//! The f32↔f32 conversion functions come in two flavors: **clamped** (default)
92//! and **extended** (unclamped). Integer paths (u8, u16) always clamp since
93//! out-of-range values can't be represented in the output format.
94//!
95//! ## Clamped (default) — use for same-gamut pipelines
96//!
97//! All functions except the `_extended` variants clamp inputs to \[0, 1\]:
98//! negatives become 0, values above 1 become 1.
99//!
100//! This is correct whenever the source and destination share the same color
101//! space (gamut + transfer function). The typical pipeline:
102//!
103//! 1. Decode sRGB image (u8 → linear f32 via LUT, or f32 via TRC)
104//! 2. Process in linear light (resize, blur, blend, composite)
105//! 3. Re-encode to sRGB (linear f32 → sRGB f32 or u8)
106//!
107//! In this pipeline, out-of-range values only come from processing artifacts:
108//! resize filters with negative lobes (Lanczos, Mitchell, etc.) produce small
109//! negatives near dark edges and values slightly above 1.0 near bright edges.
110//! These are ringing artifacts, not real colors — clamping is correct.
111//!
112//! Float decoders like jpegli can also produce small out-of-range values from
113//! YCbCr quantization noise. When the image is sRGB, these are compression
114//! artifacts and clamping is correct — gives the same result as decoding to
115//! u8 first.
116//!
117//! ## Extended (unclamped) — use for cross-gamut pipelines
118//!
119//! [`precise::srgb_to_linear_extended`] and [`precise::linear_to_srgb_extended`]
120//! do not clamp. They follow the mathematical sRGB transfer function for all
121//! inputs: negatives pass through the linear segment, values above 1.0 pass
122//! through the power segment.
123//!
124//! Use these when the sRGB transfer function is applied to values from a
125//! **different, wider gamut**. A 3×3 matrix converting Rec. 2020 linear or
126//! Display P3 linear to sRGB linear can produce values well outside \[0, 1\]:
127//! a saturated Rec. 2020 green maps to deeply negative sRGB red and blue.
128//! These are real out-of-gamut colors, not artifacts — clamping destroys
129//! information that downstream gamut mapping or compositing may need.
130//!
131//! This matters in practice: JPEG and JPEG XL images can carry Rec. 2020 or
132//! Display P3 ICC profiles. Phones shoot Rec. 2020 HLG, cameras embed
133//! wide-gamut profiles. Decoding such an image and converting to sRGB for
134//! display produces out-of-gamut values that should survive until final
135//! output.
136//!
137//! If a float decoder (jpegli, libjxl) outputs wide-gamut data directly to
138//! f32, the output contains both small compression artifacts and real
139//! out-of-gamut values. The artifacts are tiny; the gamut excursions
140//! dominate. Using `_extended` preserves both — the artifacts are harmless
141//! noise that vanishes at quantization.
142//!
143//! The `_extended` variants also cover **scRGB** (float sRGB with values
144//! outside \[0, 1\] for HDR and wide color) and any pipeline where
145//! intermediate f32 values are not yet at the final output stage.
146//!
147//! ## Summary
148//!
149//! | Function | Range | Pipeline |
150//! |----------|-------|----------|
151//! | All `default::*_slice`, `tokens::*`, `lut::*` | \[0, 1\] | Same-gamut batch processing |
152//! | [`default::srgb_to_linear`] | \[0, 1\] | Same-gamut single values |
153//! | [`default::linear_to_srgb`] | \[0, 1\] | Same-gamut single values |
154//! | [`precise::srgb_to_linear_extended`] | Unbounded | Cross-gamut, scRGB, HDR |
155//! | [`precise::linear_to_srgb_extended`] | Unbounded | Cross-gamut, scRGB, HDR |
156//! | All u8/u16 paths | \[0, 1\] | Final quantization (clamp inherent) |
157//!
158//! **No SIMD extended-range variants exist yet.** The fast polynomial
159//! approximation is fitted to \[0, 1\] and produces garbage outside that
160//! domain. Extended-range SIMD would use `pow` instead of the polynomial
161//! (~3× slower, still faster than scalar for `linear_to_srgb`). For batch
162//! extended-range conversion today, loop over the [`precise`] `_extended`
163//! functions.
164//!
165//! # Feature Flags
166//!
167//! - **`std`** (default) — Enable runtime SIMD dispatch. Required for slice functions.
168//! - **`avx512`** (default) — Enable AVX-512 code paths and `tokens::x16` module.
169//! - **`transfer`** — BT.709, PQ, and HLG transfer functions in `tf` and [`tokens`].
170//! - **`iec`** — IEC 61966-2-1 textbook sRGB functions for legacy interop.
171//! - **`alt`** — Alternative implementations for benchmarking (not stable API).
172//! - **`unsafe_simd`** — No-op (kept for backward compatibility, will be removed in 0.7).
173//!
174//! # `no_std` Support
175//!
176//! This crate is `no_std` compatible. Without `std`, u16 functions use the
177//! rational polynomial instead of LUT (slower but no heap allocation).
178//! Disable the `std` feature:
179//!
180//! ```toml
181//! linear-srgb = { version = "0.6", default-features = false }
182//! ```
183
184#![cfg_attr(not(feature = "std"), no_std)]
185#![forbid(unsafe_code)]
186#![warn(missing_docs)]
187
188#[cfg(not(feature = "std"))]
189extern crate alloc;
190
191#[cfg(all(test, not(feature = "std")))]
192extern crate std;
193
194// ============================================================================
195// Public modules
196// ============================================================================
197
198/// Recommended API with optimal implementations for each use case.
199///
200/// Uses a rational polynomial for single f32 values (≤14 ULP, perfectly
201/// monotonic), LUT for integer types, and SIMD-dispatched batch processing
202/// for slices.
203pub mod default;
204
205/// Exact `powf()`-based conversions with C0-continuous constants.
206///
207/// Uses C0-continuous constants (from the moxcms reference implementation) that
208/// eliminate the IEC 61966-2-1 piecewise discontinuity. ~6 ULP max error
209/// vs f64 reference. See the module docs for the constant comparison table.
210///
211/// Also provides f64, extended-range (unclamped), and custom gamma functions.
212/// For faster alternatives, use [`default`].
213pub mod precise;
214
215/// Lookup table types for sRGB conversion.
216///
217/// Provides both build-time const tables ([`SrgbConverter`](lut::SrgbConverter))
218/// and runtime-generated tables for custom bit depths (10-bit, 12-bit, 16-bit).
219pub mod lut;
220
221/// Inlineable `#[rite]` functions for embedding in your own `#[arcane]` code.
222///
223/// These carry `#[target_feature]` + `#[inline]` directly — no wrapper, no
224/// dispatch. When called from a matching `#[arcane]` context, LLVM inlines
225/// them fully. Organized by SIMD width; suffixed by required token tier.
226///
227/// Also re-exports token types for convenience: `X64V3Token`, `X64V4Token`,
228/// `NeonToken`, `Wasm128Token` (each gated to its target architecture).
229///
230/// When the `transfer` feature is enabled, each width module also provides
231/// rites for BT.709, PQ, and HLG (prefixed with `tf_` for sRGB to avoid
232/// name collisions with the rational polynomial sRGB rites).
233pub mod tokens;
234
235/// Transfer functions: sRGB, BT.709, PQ (ST 2084), HLG (ARIB STD-B67).
236///
237/// Provides scalar functions for all four transfer curves. SIMD `#[rite]`
238/// versions live in [`tokens`] (x4/x8/x16).
239///
240/// Requires the `transfer` feature.
241#[cfg(feature = "transfer")]
242pub mod tf;
243
244/// IEC 61966-2-1:1999 textbook sRGB transfer functions.
245///
246/// Provides the original specification constants (threshold 0.04045, offset 0.055)
247/// for interoperability with software that implements IEC 61966-2-1 verbatim.
248/// The default module uses C0-continuous constants that eliminate the spec's
249/// ~2.3e-9 piecewise discontinuity.
250///
251/// Requires the `iec` feature.
252#[cfg(feature = "iec")]
253pub mod iec;
254
255// ============================================================================
256// Internal modules
257// ============================================================================
258
259pub(crate) mod scalar;
260pub(crate) mod simd;
261
262mod mlaf;
263
264// Rational polynomial sRGB approximation (shared coefficients + scalar evaluator)
265pub(crate) mod rational_poly;
266
267// Pre-computed const lookup tables (embedded in binary)
268mod const_luts;
269
270// Lazily-initialized u16 sRGB LUTs (OnceLock, allocated on first use)
271#[cfg(feature = "std")]
272#[doc(hidden)]
273pub mod u16_lut;
274
275// Alternative/experimental implementations (for benchmarking, not stable API)
276#[cfg(feature = "alt")]
277#[doc(hidden)]
278pub mod alt;
279
280// ============================================================================
281// Tests
282// ============================================================================
283
284#[cfg(test)]
285mod tests {
286    use crate::default::*;
287
288    #[cfg(not(feature = "std"))]
289    use alloc::vec::Vec;
290
291    #[test]
292    fn test_api_consistency() {
293        // Ensure direct and LUT-based conversions are consistent
294        let conv = SrgbConverter::new();
295
296        for i in 0..=255u8 {
297            let direct = srgb_u8_to_linear(i);
298            let lut = conv.srgb_u8_to_linear(i);
299            assert!(
300                (direct - lut).abs() < 1e-5,
301                "Mismatch at {}: direct={}, lut={}",
302                i,
303                direct,
304                lut
305            );
306        }
307    }
308
309    #[test]
310    fn test_slice_conversion() {
311        let mut values: Vec<f32> = (0..=10).map(|i| i as f32 / 10.0).collect();
312        let original = values.clone();
313
314        srgb_to_linear_slice(&mut values);
315        linear_to_srgb_slice(&mut values);
316
317        for (i, (orig, conv)) in original.iter().zip(values.iter()).enumerate() {
318            assert!(
319                (orig - conv).abs() < 1e-5,
320                "Slice roundtrip failed at {}: {} -> {}",
321                i,
322                orig,
323                conv
324            );
325        }
326    }
327}
linear_srgb/lib.rs

linear_srgb/
lib.rs