jxl_encoder_simd/
lib.rs

1// Copyright (c) Imazen LLC and the JPEG XL Project Authors.
2// Algorithms and constants derived from libjxl (BSD-3-Clause).
3// Licensed under AGPL-3.0-or-later. Commercial licenses at https://www.imazen.io/pricing
4
5//! SIMD-accelerated primitives for jxl_encoder.
6//!
7//! This crate wraps platform-specific SIMD intrinsics behind safe public functions.
8//! The main encoder crate (`jxl_encoder`) maintains `#![forbid(unsafe_code)]` and
9//! calls into these safe wrappers.
10//!
11//! Uses [archmage](https://docs.rs/archmage) for token-based SIMD dispatch
12//! and [magetypes](https://docs.rs/magetypes) for cross-platform vector types.
13//!
14//! # Direct variant access
15//!
16//! Each kernel is available in three forms:
17//! - A dispatching function (e.g. `dct_8x8`) that picks the best at runtime
18//! - Concrete `_avx2(token, ...)` / `_neon(token, ...)` / `_scalar(...)` variants
19//!
20//! For hot loops, callers should summon a token once, then call the concrete
21//! variant directly from an `#[arcane]` function so LLVM can inline across the
22//! target-feature boundary.
23
24#![forbid(unsafe_code)]
25#![no_std]
26extern crate alloc;
27
28mod block_l2;
29mod dct16;
30mod dct8;
31mod dequant;
32mod entropy;
33mod epf;
34mod gab;
35mod gaborish5x5;
36mod idct16;
37mod mask1x1;
38mod pixel_loss;
39mod quantize;
40mod transpose;
41mod xyb;
42
43// Re-export archmage token types so callers don't need a direct archmage dependency
44#[cfg(target_arch = "aarch64")]
45pub use archmage::NeonToken;
46pub use archmage::SimdToken;
47#[cfg(target_arch = "x86_64")]
48pub use archmage::X64V3Token;
49
50// --- Dispatching functions (runtime auto-select) ---
51
52pub use block_l2::compute_block_l2_errors;
53pub use dct8::{dct_8x8, idct_8x8};
54pub use dct16::{dct_8x16, dct_16x8, dct_16x16};
55pub use dequant::dequant_block_dct8;
56pub use entropy::{EntropyCoeffResult, entropy_estimate_coeffs};
57pub use epf::{epf_step1, epf_step2};
58pub use gab::gab_smooth_channel;
59pub use gaborish5x5::gaborish_5x5_channel;
60pub use idct16::{idct_8x16, idct_16x8, idct_16x16};
61pub use mask1x1::compute_mask1x1;
62pub use pixel_loss::pixel_domain_loss;
63pub use quantize::quantize_block_dct8;
64pub use transpose::transpose_8x8;
65pub use xyb::{linear_rgb_to_xyb_batch, xyb_to_linear_rgb_batch, xyb_to_linear_rgb_planar};
66
67// --- Scalar variants (no token needed) ---
68
69pub use block_l2::compute_block_l2_errors_scalar;
70pub use dct8::{dct_8x8_scalar, idct_8x8_scalar};
71pub use dct16::{dct_8x16_scalar, dct_16x8_scalar, dct_16x16_scalar};
72pub use dequant::dequant_dct8_scalar;
73pub use entropy::entropy_coeffs_scalar;
74pub use epf::{epf_step1_scalar, epf_step2_scalar};
75pub use gab::gab_smooth_scalar;
76pub use gaborish5x5::gaborish_5x5_scalar;
77pub use idct16::{idct_8x16_scalar, idct_16x8_scalar, idct_16x16_scalar};
78pub use mask1x1::compute_mask1x1_scalar;
79pub use pixel_loss::pixel_domain_loss_scalar;
80pub use quantize::quantize_dct8_scalar;
81// transpose has no separate scalar — the dispatching fn IS the scalar fallback
82pub use xyb::{forward_xyb_scalar, inverse_xyb_planar_scalar, inverse_xyb_scalar};
83
84// --- AVX2 variants (require X64V3Token) ---
85
86#[cfg(target_arch = "x86_64")]
87pub use block_l2::compute_block_l2_errors_avx2;
88#[cfg(target_arch = "x86_64")]
89pub use dct8::{dct_8x8_avx2, idct_8x8_avx2};
90#[cfg(target_arch = "x86_64")]
91pub use dct16::{dct_8x16_avx2, dct_16x8_avx2, dct_16x16_avx2};
92#[cfg(target_arch = "x86_64")]
93pub use dequant::dequant_dct8_avx2;
94#[cfg(target_arch = "x86_64")]
95pub use entropy::entropy_coeffs_avx2;
96#[cfg(target_arch = "x86_64")]
97pub use epf::{epf_step1_avx2, epf_step2_avx2};
98#[cfg(target_arch = "x86_64")]
99pub use gab::gab_smooth_avx2;
100#[cfg(target_arch = "x86_64")]
101pub use gaborish5x5::gaborish_5x5_avx2;
102#[cfg(target_arch = "x86_64")]
103pub use idct16::{idct_8x16_avx2, idct_16x8_avx2, idct_16x16_avx2};
104#[cfg(target_arch = "x86_64")]
105pub use mask1x1::compute_mask1x1_avx2;
106#[cfg(target_arch = "x86_64")]
107pub use pixel_loss::pixel_domain_loss_avx2;
108#[cfg(target_arch = "x86_64")]
109pub use quantize::quantize_dct8_avx2;
110#[cfg(target_arch = "x86_64")]
111pub use transpose::transpose_8x8_avx2;
112#[cfg(target_arch = "x86_64")]
113pub use xyb::{forward_xyb_avx2, inverse_xyb_avx2, inverse_xyb_planar_avx2};
114
115// --- NEON variants (require NeonToken) ---
116
117#[cfg(target_arch = "aarch64")]
118pub use block_l2::compute_block_l2_errors_neon;
119#[cfg(target_arch = "aarch64")]
120pub use dct8::{dct_8x8_neon, idct_8x8_neon};
121#[cfg(target_arch = "aarch64")]
122pub use dct16::{dct_8x16_neon, dct_16x8_neon, dct_16x16_neon};
123#[cfg(target_arch = "aarch64")]
124pub use dequant::dequant_dct8_neon;
125#[cfg(target_arch = "aarch64")]
126pub use entropy::entropy_coeffs_neon;
127#[cfg(target_arch = "aarch64")]
128pub use epf::{epf_step1_neon, epf_step2_neon};
129#[cfg(target_arch = "aarch64")]
130pub use gab::gab_smooth_neon;
131#[cfg(target_arch = "aarch64")]
132pub use gaborish5x5::gaborish_5x5_neon;
133#[cfg(target_arch = "aarch64")]
134pub use idct16::{idct_8x16_neon, idct_16x8_neon, idct_16x16_neon};
135#[cfg(target_arch = "aarch64")]
136pub use mask1x1::compute_mask1x1_neon;
137#[cfg(target_arch = "aarch64")]
138pub use pixel_loss::pixel_domain_loss_neon;
139#[cfg(target_arch = "aarch64")]
140pub use quantize::quantize_dct8_neon;
141#[cfg(target_arch = "aarch64")]
142pub use transpose::transpose_8x8_neon;
143#[cfg(target_arch = "aarch64")]
144pub use xyb::{forward_xyb_neon, inverse_xyb_neon, inverse_xyb_planar_neon};
jxl_encoder_simd/lib.rs

jxl_encoder_simd/
lib.rs