use core::fmt;
use crate::dct53_2d::Dwt53TwoDimensional;
use crate::dct97_2d::Dwt97TwoDimensional;
use crate::dct_grid::validate_dct_block_grid;
use crate::reversible53::{
reversible_lift_53_high_at, reversible_lift_53_i32, reversible_lift_53_low_at,
};
pub use j2k::{
EncodedHtJ2kCodeBlock, IrreversibleQuantizationSubbandScales, J2kSubBandType,
PreencodedHtj2k97CodeBlock, PreencodedHtj2k97CompactCodeBlock,
PreencodedHtj2k97CompactComponent, PreencodedHtj2k97CompactImage,
PreencodedHtj2k97CompactResolution, PreencodedHtj2k97CompactSubband,
PreencodedHtj2k97Component, PreencodedHtj2k97Resolution, PreencodedHtj2k97Subband,
PrequantizedHtj2k97CodeBlock, PrequantizedHtj2k97Component, PrequantizedHtj2k97Image,
PrequantizedHtj2k97Resolution, PrequantizedHtj2k97Subband,
};
use j2k_jpeg::transcode::idct_islow_block;
use rayon::prelude::*;
const REVERSIBLE_DWT53_UNSUPPORTED_GRID: &str =
"reversible DCT 5/3 job has unsupported grid geometry";
#[derive(Debug, Clone, Copy)]
pub struct DctGridToReversibleDwt53Job<'a> {
pub dequantized_blocks: &'a [[i16; 64]],
pub block_cols: usize,
pub block_rows: usize,
pub width: usize,
pub height: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ReversibleDwt53FirstLevel {
pub ll: Vec<i32>,
pub hl: Vec<i32>,
pub lh: Vec<i32>,
pub hh: Vec<i32>,
pub low_width: usize,
pub low_height: usize,
pub high_width: usize,
pub high_height: usize,
}
#[derive(Debug, Clone, Copy)]
pub struct DctGridToDwt53Job<'a> {
pub blocks: &'a [[[f64; 8]; 8]],
pub block_cols: usize,
pub block_rows: usize,
pub width: usize,
pub height: usize,
}
#[derive(Debug, Clone, Copy)]
pub struct DctGridToDwt97Job<'a> {
pub blocks: &'a [[[f64; 8]; 8]],
pub block_cols: usize,
pub block_rows: usize,
pub width: usize,
pub height: usize,
}
#[derive(Debug, Clone, Copy)]
pub struct DctGridToHtj2k97CodeBlockJob<'a> {
pub blocks: &'a [[[f64; 8]; 8]],
pub block_cols: usize,
pub block_rows: usize,
pub width: usize,
pub height: usize,
pub x_rsiz: u8,
pub y_rsiz: u8,
}
#[derive(Debug, Clone, Copy)]
pub struct DctGridI16ToHtj2k97CodeBlockJob<'a> {
pub dequantized_blocks: &'a [[i16; 64]],
pub block_cols: usize,
pub block_rows: usize,
pub width: usize,
pub height: usize,
pub x_rsiz: u8,
pub y_rsiz: u8,
}
#[derive(Debug, Clone, Copy)]
pub struct DctGridI16ToHtj2k97CodeBlockBatch<'a, 'j> {
pub jobs: &'j [DctGridI16ToHtj2k97CodeBlockJob<'a>],
}
#[derive(Debug, Clone)]
pub struct PreencodedHtj2k97CompactBatch {
pub payload: Vec<u8>,
pub components: Vec<PreencodedHtj2k97CompactComponent>,
}
#[derive(Debug, Clone)]
pub struct PreencodedHtj2k97CompactBatchGroups {
pub payload: Vec<u8>,
pub groups: Vec<Vec<PreencodedHtj2k97CompactComponent>>,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Htj2k97CodeBlockOptions {
pub bit_depth: u8,
pub guard_bits: u8,
pub code_block_width_exp: u8,
pub code_block_height_exp: u8,
pub irreversible_quantization_scale: f32,
pub irreversible_quantization_subband_scales: IrreversibleQuantizationSubbandScales,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct Dwt97BatchStageTimings {
pub pack_upload_us: u128,
pub idct_row_lift_us: u128,
pub column_lift_us: u128,
pub quantize_codeblock_us: u128,
pub ht_encode_us: u128,
pub ht_kernel_us: u128,
pub ht_status_readback_us: u128,
pub ht_compact_us: u128,
pub ht_output_readback_us: u128,
pub ht_codeblock_dispatches: usize,
pub readback_us: u128,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TranscodeStageError {
Unsupported(&'static str),
Backend(String),
DeviceUnavailable,
}
impl fmt::Display for TranscodeStageError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Unsupported(reason) => f.write_str(reason),
Self::Backend(reason) => f.write_str(reason),
Self::DeviceUnavailable => f.write_str("accelerator device is unavailable"),
}
}
}
impl std::error::Error for TranscodeStageError {}
impl From<&'static str> for TranscodeStageError {
fn from(reason: &'static str) -> Self {
Self::Unsupported(reason)
}
}
pub trait DctToWaveletStageAccelerator {
fn supports_dwt97_batch(&self) -> bool {
false
}
fn supports_htj2k97_codeblock_batch(&self) -> bool {
false
}
fn supports_htj2k97_i16_preencoded_batch(&self) -> bool {
false
}
fn supports_htj2k97_compact_preencoded_batch(&self) -> bool {
self.supports_htj2k97_i16_preencoded_batch()
}
fn dct_grid_to_reversible_dwt53(
&mut self,
_job: DctGridToReversibleDwt53Job<'_>,
) -> Result<Option<ReversibleDwt53FirstLevel>, TranscodeStageError> {
Ok(None)
}
fn dct_grid_to_reversible_dwt53_batch(
&mut self,
_jobs: &[DctGridToReversibleDwt53Job<'_>],
) -> Result<Option<Vec<ReversibleDwt53FirstLevel>>, TranscodeStageError> {
Ok(None)
}
fn dct_grid_to_dwt53(
&mut self,
_job: DctGridToDwt53Job<'_>,
) -> Result<Option<Dwt53TwoDimensional<f64>>, TranscodeStageError> {
Ok(None)
}
fn dct_grid_to_dwt97(
&mut self,
_job: DctGridToDwt97Job<'_>,
) -> Result<Option<Dwt97TwoDimensional<f64>>, TranscodeStageError> {
Ok(None)
}
fn dct_grid_to_dwt97_batch(
&mut self,
_jobs: &[DctGridToDwt97Job<'_>],
) -> Result<Option<Vec<Dwt97TwoDimensional<f64>>>, TranscodeStageError> {
Ok(None)
}
fn dct_grid_to_htj2k97_codeblock_batch(
&mut self,
_jobs: &[DctGridToHtj2k97CodeBlockJob<'_>],
_options: Htj2k97CodeBlockOptions,
) -> Result<Option<Vec<PrequantizedHtj2k97Component>>, TranscodeStageError> {
Ok(None)
}
fn dct_grid_to_htj2k97_preencoded_batch(
&mut self,
_jobs: &[DctGridToHtj2k97CodeBlockJob<'_>],
_options: Htj2k97CodeBlockOptions,
) -> Result<Option<Vec<PreencodedHtj2k97Component>>, TranscodeStageError> {
Ok(None)
}
fn dct_grid_i16_to_htj2k97_preencoded_batch(
&mut self,
_jobs: &[DctGridI16ToHtj2k97CodeBlockJob<'_>],
_options: Htj2k97CodeBlockOptions,
) -> Result<Option<Vec<PreencodedHtj2k97Component>>, TranscodeStageError> {
Ok(None)
}
fn dct_grid_i16_to_htj2k97_compact_preencoded_batch(
&mut self,
_jobs: &[DctGridI16ToHtj2k97CodeBlockJob<'_>],
_options: Htj2k97CodeBlockOptions,
) -> Result<Option<PreencodedHtj2k97CompactBatch>, TranscodeStageError> {
Ok(None)
}
fn dct_grid_i16_to_htj2k97_preencoded_batch_groups(
&mut self,
_groups: &[DctGridI16ToHtj2k97CodeBlockBatch<'_, '_>],
_options: Htj2k97CodeBlockOptions,
) -> Result<Option<Vec<Vec<PreencodedHtj2k97Component>>>, TranscodeStageError> {
Ok(None)
}
fn dct_grid_i16_to_htj2k97_compact_preencoded_batch_groups(
&mut self,
_groups: &[DctGridI16ToHtj2k97CodeBlockBatch<'_, '_>],
_options: Htj2k97CodeBlockOptions,
) -> Result<Option<PreencodedHtj2k97CompactBatchGroups>, TranscodeStageError> {
Ok(None)
}
fn last_dwt97_batch_stage_timings(&self) -> Option<Dwt97BatchStageTimings> {
None
}
}
#[derive(Debug, Default, Clone, Copy)]
pub struct CpuOnlyDctToWaveletStageAccelerator;
impl DctToWaveletStageAccelerator for CpuOnlyDctToWaveletStageAccelerator {}
#[derive(Debug, Default, Clone)]
pub struct RayonReversibleDwt53Accelerator {
attempts: usize,
dispatches: usize,
batch_attempts: usize,
batch_dispatches: usize,
}
impl RayonReversibleDwt53Accelerator {
#[must_use]
pub const fn reversible_dwt53_attempts(&self) -> usize {
self.attempts
}
#[must_use]
pub const fn reversible_dwt53_dispatches(&self) -> usize {
self.dispatches
}
#[must_use]
pub const fn reversible_dwt53_batch_attempts(&self) -> usize {
self.batch_attempts
}
#[must_use]
pub const fn reversible_dwt53_batch_dispatches(&self) -> usize {
self.batch_dispatches
}
}
impl DctToWaveletStageAccelerator for RayonReversibleDwt53Accelerator {
fn dct_grid_to_reversible_dwt53(
&mut self,
job: DctGridToReversibleDwt53Job<'_>,
) -> Result<Option<ReversibleDwt53FirstLevel>, TranscodeStageError> {
self.attempts = self.attempts.saturating_add(1);
let output = reversible_dwt53_first_level_rayon(job)?;
self.dispatches = self.dispatches.saturating_add(1);
Ok(Some(output))
}
fn dct_grid_to_reversible_dwt53_batch(
&mut self,
jobs: &[DctGridToReversibleDwt53Job<'_>],
) -> Result<Option<Vec<ReversibleDwt53FirstLevel>>, TranscodeStageError> {
self.batch_attempts = self.batch_attempts.saturating_add(1);
let mut output = Vec::with_capacity(jobs.len());
for job in jobs {
output.push(reversible_dwt53_first_level_rayon(*job)?);
}
self.batch_dispatches = self.batch_dispatches.saturating_add(1);
Ok(Some(output))
}
}
pub fn idct_blocks_to_signed_samples_rayon(blocks: &[[i16; 64]]) -> Vec<[i32; 64]> {
blocks
.par_iter()
.map(|block| {
let decoded = idct_islow_block(block);
decoded.map(|sample| i32::from(sample) - 128)
})
.collect()
}
pub fn reversible_dwt53_first_level_from_block_samples(
block_samples: &[[i32; 64]],
block_cols: usize,
block_rows: usize,
width: usize,
height: usize,
) -> Result<ReversibleDwt53FirstLevel, &'static str> {
validate_reversible_grid(block_samples.len(), block_cols, block_rows, width, height)?;
let low_width = width.div_ceil(2);
let low_height = height.div_ceil(2);
let high_width = width / 2;
let high_height = height / 2;
let low_rows: Vec<(Vec<i32>, Vec<i32>)> = (0..low_height)
.into_par_iter()
.map(|output_y| {
let mut row = Vec::with_capacity(width);
for x in 0..width {
row.push(vertical_low_53_i32_at(
block_samples,
block_cols,
width,
height,
x,
output_y,
));
}
reversible_lift_53_i32(&mut row);
(
row.iter().step_by(2).copied().collect(),
row.iter().skip(1).step_by(2).copied().collect(),
)
})
.collect();
let high_rows: Vec<(Vec<i32>, Vec<i32>)> = (0..high_height)
.into_par_iter()
.map(|output_y| {
let mut row = Vec::with_capacity(width);
for x in 0..width {
row.push(vertical_high_53_i32_at(
block_samples,
block_cols,
width,
height,
x,
output_y,
));
}
reversible_lift_53_i32(&mut row);
(
row.iter().step_by(2).copied().collect(),
row.iter().skip(1).step_by(2).copied().collect(),
)
})
.collect();
let mut ll = Vec::with_capacity(low_width * low_height);
let mut hl = Vec::with_capacity(high_width * low_height);
for (low, high) in low_rows {
ll.extend(low);
hl.extend(high);
}
let mut lh = Vec::with_capacity(low_width * high_height);
let mut hh = Vec::with_capacity(high_width * high_height);
for (low, high) in high_rows {
lh.extend(low);
hh.extend(high);
}
Ok(ReversibleDwt53FirstLevel {
ll,
hl,
lh,
hh,
low_width,
low_height,
high_width,
high_height,
})
}
fn reversible_dwt53_first_level_rayon(
job: DctGridToReversibleDwt53Job<'_>,
) -> Result<ReversibleDwt53FirstLevel, &'static str> {
validate_reversible_grid(
job.dequantized_blocks.len(),
job.block_cols,
job.block_rows,
job.width,
job.height,
)?;
let block_samples = idct_blocks_to_signed_samples_rayon(job.dequantized_blocks);
reversible_dwt53_first_level_from_block_samples(
&block_samples,
job.block_cols,
job.block_rows,
job.width,
job.height,
)
}
fn validate_reversible_grid(
block_count: usize,
block_cols: usize,
block_rows: usize,
width: usize,
height: usize,
) -> Result<(), &'static str> {
validate_dct_block_grid(block_count, block_cols, block_rows, width, height)
.map_err(|_| REVERSIBLE_DWT53_UNSUPPORTED_GRID)
}
fn vertical_low_53_i32_at(
block_samples: &[[i32; 64]],
block_cols: usize,
width: usize,
height: usize,
x: usize,
low_idx: usize,
) -> i32 {
reversible_lift_53_low_at(height, low_idx, |y| {
component_sample_i32(block_samples, block_cols, width, height, x, y)
})
}
fn vertical_high_53_i32_at(
block_samples: &[[i32; 64]],
block_cols: usize,
width: usize,
height: usize,
x: usize,
high_idx: usize,
) -> i32 {
reversible_lift_53_high_at(height, high_idx, |y| {
component_sample_i32(block_samples, block_cols, width, height, x, y)
})
}
fn component_sample_i32(
block_samples: &[[i32; 64]],
block_cols: usize,
width: usize,
height: usize,
x: usize,
y: usize,
) -> i32 {
debug_assert!(x < width);
debug_assert!(y < height);
let block_x = x / 8;
let block_y = y / 8;
let block_idx = block_y * block_cols + block_x;
let local_idx = (y % 8) * 8 + (x % 8);
block_samples[block_idx][local_idx]
}
#[cfg(test)]
mod ground_truth_tests {
use super::{
reversible_dwt53_first_level_from_block_samples, reversible_lift_53_i32,
ReversibleDwt53FirstLevel,
};
fn floor2(a: i32, b: i32) -> i32 {
a.div_euclid(b)
}
fn ws_reflect(i: isize, n: usize) -> usize {
if n == 1 {
return 0;
}
let n = isize::try_from(n).unwrap();
let period = 2 * (n - 1);
let mut k = i.rem_euclid(period);
if k >= n {
k = period - k;
}
usize::try_from(k).unwrap()
}
fn ref_53_forward(signal: &[i32]) -> (Vec<i32>, Vec<i32>) {
let n = signal.len();
if n < 2 {
return (signal.to_vec(), Vec::new());
}
let sig = |i: isize| signal[ws_reflect(i, n)];
let detail = |m: isize| {
let c = 2 * m + 1;
sig(c) - floor2(sig(c - 1) + sig(c + 1), 2)
};
let low: Vec<i32> = (0..n.div_ceil(2))
.map(|m| {
let mi = isize::try_from(m).unwrap();
sig(2 * mi) + floor2(detail(mi - 1) + detail(mi) + 2, 4)
})
.collect();
let high: Vec<i32> = (0..n / 2)
.map(|m| detail(isize::try_from(m).unwrap()))
.collect();
(low, high)
}
fn ref_53_2d(plane: &[i32], width: usize, height: usize) -> ReversibleDwt53FirstLevel {
let low_width = width.div_ceil(2);
let high_width = width / 2;
let low_height = height.div_ceil(2);
let high_height = height / 2;
let mut v_low = vec![0i32; width * low_height];
let mut v_high = vec![0i32; width * high_height];
for x in 0..width {
let column: Vec<i32> = (0..height).map(|y| plane[y * width + x]).collect();
let (lo, hi) = ref_53_forward(&column);
for (oy, &value) in lo.iter().enumerate() {
v_low[oy * width + x] = value;
}
for (oy, &value) in hi.iter().enumerate() {
v_high[oy * width + x] = value;
}
}
let horizontal = |source: &[i32], rows: usize| -> (Vec<i32>, Vec<i32>) {
let mut low = vec![0i32; low_width * rows];
let mut high = vec![0i32; high_width * rows];
for oy in 0..rows {
let (lo, hi) = ref_53_forward(&source[oy * width..oy * width + width]);
low[oy * low_width..oy * low_width + low_width].copy_from_slice(&lo);
high[oy * high_width..oy * high_width + high_width].copy_from_slice(&hi);
}
(low, high)
};
let (ll, hl) = horizontal(&v_low, low_height);
let (lh, hh) = horizontal(&v_high, high_height);
ReversibleDwt53FirstLevel {
ll,
hl,
lh,
hh,
low_width,
low_height,
high_width,
high_height,
}
}
fn pack_plane(plane: &[i32], width: usize, height: usize) -> (Vec<[i32; 64]>, usize, usize) {
let block_cols = width.div_ceil(8);
let block_rows = height.div_ceil(8);
let mut blocks = vec![[0i32; 64]; block_cols * block_rows];
for y in 0..height {
for x in 0..width {
let block = (y / 8) * block_cols + (x / 8);
blocks[block][(y % 8) * 8 + (x % 8)] = plane[y * width + x];
}
}
(blocks, block_cols, block_rows)
}
fn next_sample(state: &mut u64) -> i32 {
*state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
((*state >> 40) & 0x1ff) as i32 - 256
}
#[test]
fn reversible_lift_53_matches_canonical_formula_1d() {
let mut state = 0x0a11_ce5e_ed00_d001u64;
for n in [2usize, 3, 4, 5, 8, 9, 12, 15, 16, 23, 32, 33, 64, 65] {
let signal: Vec<i32> = (0..n).map(|_| next_sample(&mut state)).collect();
let mut lifted = signal.clone();
reversible_lift_53_i32(&mut lifted);
let lifted_low: Vec<i32> = lifted.iter().step_by(2).copied().collect();
let lifted_high: Vec<i32> = lifted.iter().skip(1).step_by(2).copied().collect();
let (low, high) = ref_53_forward(&signal);
assert_eq!(lifted_low, low, "low band mismatch for n={n}");
assert_eq!(lifted_high, high, "high band mismatch for n={n}");
}
}
#[test]
fn reversible_lift_53_shared_helper_matches_canonical_formula_1d() {
let mut state = 0x5a53_5a53_5a53_5a53u64;
for n in [2usize, 3, 4, 5, 8, 9, 16, 17, 31, 32, 65] {
let signal: Vec<i32> = (0..n).map(|_| next_sample(&mut state)).collect();
let mut lifted = signal.clone();
crate::reversible53::reversible_lift_53_i32(&mut lifted);
let lifted_low: Vec<i32> = lifted.iter().step_by(2).copied().collect();
let lifted_high: Vec<i32> = lifted.iter().skip(1).step_by(2).copied().collect();
let (low, high) = ref_53_forward(&signal);
assert_eq!(lifted_low, low, "low band mismatch for n={n}");
assert_eq!(lifted_high, high, "high band mismatch for n={n}");
}
}
#[test]
fn reversible_dwt53_2d_matches_canonical_separable() {
let mut state = 0xfeed_5eed_d00d_face_u64;
for (width, height) in [
(8usize, 8usize),
(16, 16),
(24, 16),
(15, 13),
(16, 23),
(9, 7),
(32, 32),
] {
let plane: Vec<i32> = (0..width * height)
.map(|_| next_sample(&mut state))
.collect();
let (blocks, block_cols, block_rows) = pack_plane(&plane, width, height);
let got = reversible_dwt53_first_level_from_block_samples(
&blocks, block_cols, block_rows, width, height,
)
.expect("oracle accepts the packed grid");
let want = ref_53_2d(&plane, width, height);
assert_eq!(
(
got.low_width,
got.low_height,
got.high_width,
got.high_height
),
(
want.low_width,
want.low_height,
want.high_width,
want.high_height
),
"band dimensions for {width}x{height}"
);
assert_eq!(got.ll, want.ll, "LL mismatch for {width}x{height}");
assert_eq!(got.hl, want.hl, "HL mismatch for {width}x{height}");
assert_eq!(got.lh, want.lh, "LH mismatch for {width}x{height}");
assert_eq!(got.hh, want.hh, "HH mismatch for {width}x{height}");
}
}
#[test]
fn reversible_lift_53_kills_dc_and_linear_detail() {
let mut constant = vec![7i32; 32];
reversible_lift_53_i32(&mut constant);
assert!(
constant.iter().skip(1).step_by(2).all(|&v| v == 0),
"constant produced nonzero detail"
);
assert!(
constant.iter().step_by(2).all(|&v| v == 7),
"constant low band drifted from 7"
);
let ramp: Vec<i32> = (0..40_i32).map(|k| 3 * k - 5).collect();
let mut lifted = ramp;
reversible_lift_53_i32(&mut lifted);
let detail: Vec<i32> = lifted.iter().skip(1).step_by(2).copied().collect();
for &value in &detail[1..detail.len() - 1] {
assert_eq!(value, 0, "linear ramp produced interior detail {value}");
}
}
#[test]
fn reversible_dwt53_2d_separates_horizontal_and_vertical_detail() {
let (width, height) = (16usize, 16usize);
let varies_in_x: Vec<i32> = (0..width * height)
.map(|i| 3 * i32::try_from(i % width).unwrap() - 7)
.collect();
let (blocks, bc, br) = pack_plane(&varies_in_x, width, height);
let t = reversible_dwt53_first_level_from_block_samples(&blocks, bc, br, width, height)
.expect("oracle accepts grid");
assert!(
t.lh.iter().all(|&v| v == 0),
"x-only plane produced LH detail"
);
assert!(
t.hh.iter().all(|&v| v == 0),
"x-only plane produced HH detail"
);
let varies_in_y: Vec<i32> = (0..width * height)
.map(|i| 3 * i32::try_from(i / width).unwrap() - 7)
.collect();
let (blocks, bc, br) = pack_plane(&varies_in_y, width, height);
let t = reversible_dwt53_first_level_from_block_samples(&blocks, bc, br, width, height)
.expect("oracle accepts grid");
assert!(
t.hl.iter().all(|&v| v == 0),
"y-only plane produced HL detail"
);
assert!(
t.hh.iter().all(|&v| v == 0),
"y-only plane produced HH detail"
);
}
}