Skip to main content

j2k_transcode/
jpeg_to_htj2k.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2
3//! Experimental JPEG DCT to HTJ2K codestream transcode entry point.
4
5use core::fmt;
6use std::time::Instant;
7
8use j2k::adapter::encode_stage::{
9    CpuOnlyJ2kEncodeStageAccelerator, IrreversibleQuantizationSubbandScales,
10    J2kEncodeDispatchReport, J2kEncodeStageAccelerator, J2kForwardDwt53Level,
11    J2kForwardDwt53Output, J2kForwardDwt97Level, J2kForwardDwt97Output, NativeEncodeStageAdapter,
12    PrecomputedHtj2k53Component, PrecomputedHtj2k53Image, PrecomputedHtj2k97Component,
13    PrecomputedHtj2k97Image, PreencodedHtj2k97CompactComponent, PreencodedHtj2k97CompactImage,
14    PreencodedHtj2k97Component, PreencodedHtj2k97Image, PrequantizedHtj2k97Component,
15    PrequantizedHtj2k97Image,
16};
17use j2k::J2kProgressionOrder;
18use j2k_jpeg::transcode::{
19    extract_dct_blocks, idct_islow_block, DctExtractOptions, JpegDctComponent, JpegDctImage,
20};
21use j2k_native::{
22    encode_precomputed_htj2k_53_with_accelerator,
23    encode_precomputed_htj2k_97_batch_with_accelerator,
24    encode_precomputed_htj2k_97_with_accelerator,
25    encode_preencoded_htj2k_97_compact_owned_with_accelerator,
26    encode_preencoded_htj2k_97_owned_with_accelerator,
27    encode_prequantized_htj2k_97_with_accelerator,
28};
29use rayon::prelude::*;
30
31use crate::accelerator::{
32    CpuOnlyDctToWaveletStageAccelerator, DctGridI16ToHtj2k97CodeBlockBatch,
33    DctGridI16ToHtj2k97CodeBlockJob, DctGridToDwt53Job, DctGridToDwt97Job,
34    DctGridToHtj2k97CodeBlockJob, DctGridToReversibleDwt53Job, DctToWaveletStageAccelerator,
35    Dwt97BatchStageTimings, Htj2k97CodeBlockOptions, ReversibleDwt53FirstLevel,
36    TranscodeStageError,
37};
38use crate::dct53_2d::{
39    dct8x8_blocks_then_dwt53_float, dct8x8_blocks_to_dwt53_float_linear_with_scratch,
40    linearized_53_2d_from_plane, Dct53GridScratch, Dwt53TwoDimensional,
41};
42use crate::dct97_2d::{
43    dct8x8_blocks_then_dwt97_float, dct8x8_blocks_then_dwt97_float_with_scratch,
44    linearized_97_2d_from_plane_with_scratch, Dct97GridScratch, Dwt97TwoDimensional,
45};
46use crate::metrics::{error_metrics_i32, ErrorMetrics, MetricsLengthError};
47use crate::reversible53::{
48    reversible_lift_53_high_at_fallible, reversible_lift_53_i32, reversible_lift_53_low_at_fallible,
49};
50use crate::DctGridError;
51
52/// Default irreversible quantization multiplier for JPEG direct 9/7 HTJ2K.
53///
54/// Empirically rate-match the explicit lossy comparison profile near the
55/// external comparator output size on the bundled WSI tiles. Lower values
56/// produce larger/higher-quality codestreams; `1.0` matches the native encoder
57/// default but overshoots the external baseline size for this transcode path.
58pub const JPEG_TO_HTJ2K_LOSSY_97_QUANTIZATION_SCALE: f32 = 1.9;
59
60/// HTJ2K encode options used after JPEG coefficient-domain wavelet bands are produced.
61#[derive(Debug, Clone, PartialEq)]
62#[allow(clippy::struct_excessive_bools)]
63pub struct JpegToHtj2kEncodeOptions {
64    /// Number of wavelet decomposition levels.
65    pub num_decomposition_levels: u8,
66    /// Whether to emit reversible/lossless coding.
67    pub reversible: bool,
68    /// Code-block width exponent minus two.
69    pub code_block_width_exp: u8,
70    /// Code-block height exponent minus two.
71    pub code_block_height_exp: u8,
72    /// JPEG 2000 guard bits.
73    pub guard_bits: u8,
74    /// Whether to encode HTJ2K code blocks instead of classic EBCOT.
75    pub use_ht_block_coding: bool,
76    /// Packet progression order.
77    pub progression_order: J2kProgressionOrder,
78    /// Whether to write a TLM marker segment.
79    pub write_tlm: bool,
80    /// Whether to write PLT packet-length marker segments.
81    pub write_plt: bool,
82    /// Whether to write PLM packet-length marker segments.
83    pub write_plm: bool,
84    /// Whether to write PPM packed packet-header marker segments.
85    pub write_ppm: bool,
86    /// Whether to write PPT packed packet-header marker segments.
87    pub write_ppt: bool,
88    /// Whether to write SOP marker segments before packets.
89    pub write_sop: bool,
90    /// Whether to write EPH markers after packet headers.
91    pub write_eph: bool,
92    /// Whether to apply JPEG 2000 multi-component transform.
93    pub use_mct: bool,
94    /// Number of cumulative quality layers.
95    pub num_layers: u8,
96    /// Optional cumulative packet-body byte targets for each quality layer.
97    pub quality_layer_byte_targets: Vec<u64>,
98    /// Whether native HTJ2K validation is enabled after encode.
99    pub validate_high_throughput_codestream: bool,
100    /// Global irreversible 9/7 quantization scale.
101    pub irreversible_quantization_scale: f32,
102    /// Per-subband irreversible 9/7 quantization scales.
103    pub irreversible_quantization_subband_scales: IrreversibleQuantizationSubbandScales,
104    /// Optional per-component SIZ sampling factors (`XRsiz`, `YRsiz`).
105    pub component_sampling: Option<Vec<(u8, u8)>>,
106    /// Optional tile size for multi-tile codestreams.
107    pub tile_size: Option<(u32, u32)>,
108    /// Optional maximum number of complete packets to place in each tile-part.
109    pub tile_part_packet_limit: Option<u16>,
110    /// Optional precinct exponents in COD order.
111    pub precinct_exponents: Vec<(u8, u8)>,
112}
113
114impl Default for JpegToHtj2kEncodeOptions {
115    fn default() -> Self {
116        Self {
117            num_decomposition_levels: 5,
118            reversible: true,
119            code_block_width_exp: 4,
120            code_block_height_exp: 4,
121            guard_bits: 1,
122            use_ht_block_coding: false,
123            progression_order: J2kProgressionOrder::Lrcp,
124            write_tlm: false,
125            write_plt: false,
126            write_plm: false,
127            write_ppm: false,
128            write_ppt: false,
129            write_sop: false,
130            write_eph: false,
131            use_mct: true,
132            num_layers: 1,
133            quality_layer_byte_targets: Vec::new(),
134            validate_high_throughput_codestream: true,
135            irreversible_quantization_scale: 1.0,
136            irreversible_quantization_subband_scales:
137                IrreversibleQuantizationSubbandScales::default(),
138            component_sampling: None,
139            tile_size: None,
140            tile_part_packet_limit: None,
141            precinct_exponents: Vec::new(),
142        }
143    }
144}
145
146impl JpegToHtj2kEncodeOptions {
147    fn to_native(&self) -> j2k_native::EncodeOptions {
148        j2k_native::EncodeOptions {
149            num_decomposition_levels: self.num_decomposition_levels,
150            reversible: self.reversible,
151            code_block_width_exp: self.code_block_width_exp,
152            code_block_height_exp: self.code_block_height_exp,
153            guard_bits: self.guard_bits,
154            use_ht_block_coding: self.use_ht_block_coding,
155            progression_order: native_progression_order(self.progression_order),
156            write_tlm: self.write_tlm,
157            write_plt: self.write_plt,
158            write_plm: self.write_plm,
159            write_ppm: self.write_ppm,
160            write_ppt: self.write_ppt,
161            write_sop: self.write_sop,
162            write_eph: self.write_eph,
163            use_mct: self.use_mct,
164            num_layers: self.num_layers,
165            quality_layer_byte_targets: self.quality_layer_byte_targets.clone(),
166            validate_high_throughput_codestream: self.validate_high_throughput_codestream,
167            irreversible_quantization_scale: self.irreversible_quantization_scale,
168            irreversible_quantization_subband_scales: self.irreversible_quantization_subband_scales,
169            component_sampling: self.component_sampling.clone(),
170            tile_size: self.tile_size,
171            tile_part_packet_limit: self.tile_part_packet_limit,
172            precinct_exponents: self.precinct_exponents.clone(),
173            roi_component_shifts: Vec::new(),
174        }
175    }
176}
177
178/// Options for the experimental JPEG-to-HTJ2K path.
179#[derive(Debug, Clone)]
180pub struct JpegToHtj2kOptions {
181    /// HTJ2K encode options used after wavelet bands are produced.
182    pub encode_options: JpegToHtj2kEncodeOptions,
183    /// Coefficient production path used for HTJ2K precomputed bands.
184    pub coefficient_path: JpegToHtj2kCoefficientPath,
185    /// Materialize the float IDCT-then-DWT oracle and report rounded
186    /// coefficient differences. This is intended for validation and tests, not
187    /// the production direct path.
188    pub validate_against_float_reference: bool,
189    /// Materialize j2k-jpeg scalar ISLOW samples and report reversible
190    /// integer 5/3 coefficient differences against the rounded direct path.
191    /// This is intended for validation and tests, not the production direct
192    /// path.
193    pub validate_against_integer_reference: bool,
194}
195
196impl Default for JpegToHtj2kOptions {
197    fn default() -> Self {
198        Self::lossless_53()
199    }
200}
201
202impl JpegToHtj2kOptions {
203    /// Options for the default reversible 5/3 HTJ2K coefficient path.
204    #[must_use]
205    pub fn lossless_53() -> Self {
206        Self {
207            encode_options: transcode_encode_options(true),
208            coefficient_path: JpegToHtj2kCoefficientPath::IntegerDirect53,
209            validate_against_float_reference: false,
210            validate_against_integer_reference: false,
211        }
212    }
213
214    /// Options for the irreversible 9/7 HTJ2K float-linear coefficient path.
215    #[must_use]
216    pub fn lossy_97() -> Self {
217        let mut encode_options = transcode_encode_options(false);
218        encode_options.irreversible_quantization_scale = JPEG_TO_HTJ2K_LOSSY_97_QUANTIZATION_SCALE;
219        Self {
220            encode_options,
221            coefficient_path: JpegToHtj2kCoefficientPath::FloatDirectLinear97,
222            validate_against_float_reference: false,
223            validate_against_integer_reference: false,
224        }
225    }
226}
227
228fn transcode_encode_options(reversible: bool) -> JpegToHtj2kEncodeOptions {
229    JpegToHtj2kEncodeOptions {
230        num_decomposition_levels: 1,
231        reversible,
232        use_ht_block_coding: true,
233        use_mct: false,
234        validate_high_throughput_codestream: false,
235        ..JpegToHtj2kEncodeOptions::default()
236    }
237}
238
239/// Experimental production path used to generate HTJ2K wavelet coefficients.
240#[derive(Debug, Clone, Copy, PartialEq, Eq)]
241pub enum JpegToHtj2kCoefficientPath {
242    /// Exact reversible 5/3 coefficients relative to `j2k-jpeg` scalar
243    /// ISLOW block decode semantics. The first 5/3 level is computed from DCT
244    /// blocks without materializing a full spatial image plane; later levels
245    /// recurse conventionally over the LL coefficient band.
246    IntegerDirect53,
247    /// Floating-point linear composition of IDCT and 5/3 analysis. This is the
248    /// linear math oracle path and remains useful for validating the direct
249    /// matrix composition, but it is not the integer reversible production
250    /// default.
251    FloatDirectLinear53,
252    /// Floating-point linear composition of IDCT and irreversible 9/7
253    /// analysis. This is a lossy experimental path and must be paired with an
254    /// irreversible HTJ2K encode.
255    FloatDirectLinear97,
256}
257
258/// Reusable experimental JPEG-to-HTJ2K transcoder state.
259///
260/// Create one value per worker thread and reuse it across many tiles to keep
261/// scratch buffers allocated between calls. The scalar math and output are the
262/// same as [`jpeg_to_htj2k`].
263#[derive(Debug, Default)]
264pub struct JpegToHtj2kTranscoder {
265    scratch: JpegToHtj2kScratch,
266}
267
268impl JpegToHtj2kTranscoder {
269    /// Transcode a constrained baseline JPEG tile into HTJ2K using this
270    /// instance's reusable scratch buffers.
271    pub fn transcode(
272        &mut self,
273        bytes: &[u8],
274        options: &JpegToHtj2kOptions,
275    ) -> Result<EncodedTranscode, JpegToHtj2kError> {
276        let mut accelerator = CpuOnlyDctToWaveletStageAccelerator;
277        self.transcode_with_accelerator(bytes, options, &mut accelerator)
278    }
279
280    /// Transcode with an optional stage accelerator.
281    ///
282    /// Accelerators may handle direct DCT-grid projection stages and return
283    /// `None` for scalar fallback. Integer-direct 5/3 is offered in
284    /// same-geometry batches before falling back to per-component work.
285    pub fn transcode_with_accelerator<A: DctToWaveletStageAccelerator>(
286        &mut self,
287        bytes: &[u8],
288        options: &JpegToHtj2kOptions,
289        accelerator: &mut A,
290    ) -> Result<EncodedTranscode, JpegToHtj2kError> {
291        let mut encode_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
292        self.transcode_with_accelerators(bytes, options, accelerator, &mut encode_accelerator)
293    }
294
295    /// Transcode with separate transform-stage and HTJ2K encode-stage
296    /// accelerators.
297    pub fn transcode_with_accelerators<
298        A: DctToWaveletStageAccelerator,
299        E: J2kEncodeStageAccelerator,
300    >(
301        &mut self,
302        bytes: &[u8],
303        options: &JpegToHtj2kOptions,
304        transform_accelerator: &mut A,
305        encode_accelerator: &mut E,
306    ) -> Result<EncodedTranscode, JpegToHtj2kError> {
307        jpeg_to_htj2k_with_scratch(
308            bytes,
309            options,
310            &mut self.scratch,
311            transform_accelerator,
312            encode_accelerator,
313        )
314    }
315
316    /// Transcode many JPEG tiles, preserving per-tile failures in the returned
317    /// batch. Integer-direct 5/3 groups same-geometry components across tiles
318    /// before calling the accelerator.
319    pub fn transcode_batch(
320        &mut self,
321        tiles: &[JpegTileBatchInput<'_>],
322        options: &JpegToHtj2kOptions,
323    ) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
324        let mut accelerator = CpuOnlyDctToWaveletStageAccelerator;
325        self.transcode_batch_with_accelerator(tiles, options, &mut accelerator)
326    }
327
328    /// Transcode many JPEG tiles with an optional stage accelerator.
329    pub fn transcode_batch_with_accelerator<A: DctToWaveletStageAccelerator>(
330        &mut self,
331        tiles: &[JpegTileBatchInput<'_>],
332        options: &JpegToHtj2kOptions,
333        accelerator: &mut A,
334    ) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
335        let mut encode_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
336        self.transcode_batch_with_accelerators(tiles, options, accelerator, &mut encode_accelerator)
337    }
338
339    /// Transcode many JPEG tiles with separate transform-stage and HTJ2K
340    /// encode-stage accelerators.
341    pub fn transcode_batch_with_accelerators<
342        A: DctToWaveletStageAccelerator,
343        E: J2kEncodeStageAccelerator,
344    >(
345        &mut self,
346        tiles: &[JpegTileBatchInput<'_>],
347        options: &JpegToHtj2kOptions,
348        transform_accelerator: &mut A,
349        encode_accelerator: &mut E,
350    ) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
351        jpeg_tile_batch_to_htj2k_with_scratch(
352            tiles,
353            options,
354            &mut self.scratch,
355            transform_accelerator,
356            encode_accelerator,
357        )
358    }
359
360    /// Current capacity of the reusable DCT block conversion scratch.
361    ///
362    /// This is exposed for benchmark and validation harnesses while the API is
363    /// experimental.
364    #[must_use]
365    pub fn dct_block_scratch_capacity(&self) -> usize {
366        self.scratch.dct_blocks_f64.capacity()
367    }
368
369    /// Current capacity of the reusable integer block-local IDCT sample cache.
370    ///
371    /// This cache stores level-shifted 8x8 block samples for the integer-direct
372    /// path. It is block-local scratch, not a full spatial image plane.
373    #[must_use]
374    pub fn integer_idct_block_scratch_capacity(&self) -> usize {
375        self.scratch.integer_idct_blocks.capacity()
376    }
377}
378
379#[derive(Debug, Default)]
380struct JpegToHtj2kScratch {
381    dct_blocks_f64: Vec<[[f64; 8]; 8]>,
382    dct53_grid: Dct53GridScratch,
383    dct97_grid: Dct97GridScratch,
384    integer_idct_blocks: Vec<Option<[i32; 64]>>,
385    integer_row: Vec<i32>,
386}
387
388/// Encoded transcode output and validation/report metadata.
389#[derive(Debug, Clone)]
390pub struct EncodedTranscode {
391    /// HTJ2K codestream bytes.
392    pub codestream: Vec<u8>,
393    /// Summary of the experimental path used.
394    pub report: TranscodeReport,
395}
396
397/// One JPEG tile input for batch transcode.
398#[derive(Debug, Clone, Copy)]
399pub struct JpegTileBatchInput<'a> {
400    /// JPEG codestream bytes for one tile.
401    pub bytes: &'a [u8],
402}
403
404/// Batch transcode output. Tile-level parse/encode failures are preserved so a
405/// WSI ingest queue can continue past isolated bad tiles.
406#[derive(Debug)]
407pub struct EncodedTranscodeBatch {
408    /// Per-input tile result in input order.
409    pub tiles: Vec<Result<EncodedTranscode, JpegToHtj2kError>>,
410    /// Aggregate batch report.
411    pub report: BatchTranscodeReport,
412}
413
414/// Aggregate report for multi-tile transcode.
415#[derive(Debug, Clone, PartialEq, Eq)]
416pub struct BatchTranscodeReport {
417    /// Number of input tiles.
418    pub tile_count: usize,
419    /// Number of successfully encoded output tiles.
420    pub successful_tiles: usize,
421    /// Number of tile-local failures.
422    pub failed_tiles: usize,
423    /// Number of transformed components across successful extracted tiles.
424    pub transformed_components: usize,
425    /// Number of same-geometry reversible 5/3 batches submitted.
426    pub reversible_dwt53_batches: usize,
427    /// Number of reversible 5/3 component jobs in submitted batches.
428    pub reversible_dwt53_batch_jobs: usize,
429    /// Batch extraction time in microseconds.
430    pub extract_us: u128,
431    /// Batch DCT-to-wavelet time in microseconds.
432    pub transform_us: u128,
433    /// Batch HTJ2K encode time in microseconds.
434    pub encode_us: u128,
435    /// Detailed stage timings for the batch. Batch-accelerated 5/3 transform
436    /// timings stay here instead of being copied into every tile report.
437    pub timings: TranscodeTimingReport,
438    /// Coefficient path used by the batch.
439    pub coefficient_path: JpegToHtj2kCoefficientPath,
440}
441
442/// Detailed timing and dispatch counters for JPEG-to-HTJ2K transcode.
443///
444/// Durations are wall-clock microseconds measured around the current Rust API
445/// boundaries. Accelerator time includes backend submission and wait overhead
446/// visible to this crate; backend-specific hardware counters are not exposed
447/// here.
448#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
449pub struct TranscodeTimingReport {
450    /// Raw compressed-tile probe/read time before JPEG DCT extraction.
451    pub source_raw_probe_us: u128,
452    /// Source region decode time for strip/retile workflows.
453    pub read_region_decode_us: u128,
454    /// Region compose/pad time for generated regular tiles.
455    pub compose_pad_us: u128,
456    /// JPEG encode time when the workflow generates regular JPEG tiles.
457    pub generated_jpeg_encode_us: u128,
458    /// JPEG DCT extraction time in microseconds.
459    pub jpeg_dct_extract_us: u128,
460    /// Time spent repacking integer DCT coefficients into float block grids.
461    pub jpeg_dct_repack_us: u128,
462    /// Total wall time spent producing DWT bands from JPEG DCT coefficients.
463    pub dct_to_wavelet_total_us: u128,
464    /// Wall time spent inside accelerator hook calls.
465    pub dct_to_wavelet_accelerator_us: u128,
466    /// Wall time spent in scalar CPU fallback transforms.
467    pub dct_to_wavelet_cpu_fallback_us: u128,
468    /// Time spent decomposing first-level DWT output into requested levels.
469    pub dwt_decompose_us: u128,
470    /// Backend 9/7 batch host pack/upload time in microseconds.
471    pub dwt97_batch_pack_upload_us: u128,
472    /// Logical host-to-device transfers during backend 9/7 batch pack/upload.
473    pub dwt97_batch_pack_upload_transfers: usize,
474    /// Host-to-device bytes during backend 9/7 batch pack/upload.
475    pub dwt97_batch_pack_upload_bytes: u64,
476    /// Resident JPEG DCT-grid descriptors validated during backend 9/7 batches.
477    pub dwt97_batch_resident_dct_handoff_count: usize,
478    /// Backend 9/7 batch IDCT plus horizontal row-lift time in microseconds.
479    pub dwt97_batch_idct_row_lift_us: u128,
480    /// Backend 9/7 batch vertical column-lift time in microseconds.
481    pub dwt97_batch_column_lift_us: u128,
482    /// Resident DWT subband descriptors validated during backend 9/7 batches.
483    pub dwt97_batch_resident_dwt_handoff_count: usize,
484    /// Backend 9/7 batch quantize/code-block layout time in microseconds.
485    pub dwt97_batch_quantize_codeblock_us: u128,
486    /// Backend 9/7 resident HT code-block encode time in microseconds.
487    pub dwt97_batch_ht_encode_us: u128,
488    /// Backend 9/7 resident HT cleanup-pass encode kernel time in microseconds.
489    pub dwt97_batch_ht_kernel_us: u128,
490    /// Backend 9/7 resident HT status-buffer device-to-host readback time in microseconds.
491    pub dwt97_batch_ht_status_readback_us: u128,
492    /// Logical device-to-host status readbacks after resident HT encode.
493    pub dwt97_batch_ht_status_readback_transfers: usize,
494    /// Device-to-host status bytes after resident HT encode.
495    pub dwt97_batch_ht_status_readback_bytes: u64,
496    /// Backend 9/7 resident HT encoded-byte compaction kernel time in microseconds.
497    pub dwt97_batch_ht_compact_us: u128,
498    /// Backend 9/7 resident HT compacted encoded-byte device-to-host readback time in microseconds.
499    pub dwt97_batch_ht_output_readback_us: u128,
500    /// Logical device-to-host output readbacks after resident HT compaction.
501    pub dwt97_batch_ht_output_readback_transfers: usize,
502    /// Device-to-host output bytes after resident HT compaction.
503    pub dwt97_batch_ht_output_readback_bytes: u64,
504    /// Backend 9/7 resident HT code-block encode dispatches.
505    pub dwt97_batch_ht_codeblock_dispatches: usize,
506    /// Backend 9/7 batch output readback/unpack time in microseconds.
507    pub dwt97_batch_readback_us: u128,
508    /// Logical device-to-host transfers during backend 9/7 batch output readback.
509    pub dwt97_batch_readback_transfers: usize,
510    /// Device-to-host bytes during backend 9/7 batch output readback.
511    pub dwt97_batch_readback_bytes: u64,
512    /// HTJ2K encode time in microseconds.
513    pub htj2k_encode_us: u128,
514    /// Encode-stage accelerator dispatches during HTJ2K encode.
515    pub htj2k_encode_accelerator_dispatches: usize,
516    /// HT cleanup code-block accelerator dispatches during HTJ2K encode.
517    pub htj2k_encode_ht_code_block_dispatches: usize,
518    /// Packetization accelerator dispatches during HTJ2K encode.
519    pub htj2k_encode_packetization_dispatches: usize,
520    /// Time spent writing compressed frames to a DICOM `PixelData` spool.
521    pub dicom_spool_write_us: u128,
522    /// Time spent writing final DICOM instances.
523    pub dicom_final_write_us: u128,
524    /// Number of source tiles represented by this timing report.
525    pub tile_count: usize,
526    /// Number of components transformed into wavelet bands.
527    pub component_count: usize,
528    /// Number of same-geometry transform batches offered to the accelerator.
529    pub batch_count: usize,
530    /// Number of component jobs in same-geometry transform batches.
531    pub batch_jobs: usize,
532    /// Number of accelerator hook calls.
533    pub accelerator_attempts: usize,
534    /// Number of component jobs offered through accelerator hook calls.
535    pub accelerator_jobs: usize,
536    /// Number of accelerator hook calls that returned an accelerated result.
537    pub accelerator_dispatches: usize,
538    /// Number of component jobs completed by accelerated results.
539    pub accelerator_dispatched_jobs: usize,
540    /// Number of component jobs completed by scalar CPU fallback transforms.
541    pub cpu_fallback_jobs: usize,
542}
543
544impl TranscodeTimingReport {
545    fn add_assign(&mut self, other: Self) {
546        macro_rules! saturating_add_fields {
547            ($($field:ident),+ $(,)?) => {
548                $(
549                    self.$field = self.$field.saturating_add(other.$field);
550                )+
551            };
552        }
553
554        saturating_add_fields!(
555            source_raw_probe_us,
556            read_region_decode_us,
557            compose_pad_us,
558            generated_jpeg_encode_us,
559            jpeg_dct_extract_us,
560            jpeg_dct_repack_us,
561            dct_to_wavelet_total_us,
562            dct_to_wavelet_accelerator_us,
563            dct_to_wavelet_cpu_fallback_us,
564            dwt_decompose_us,
565            dwt97_batch_pack_upload_us,
566            dwt97_batch_pack_upload_transfers,
567            dwt97_batch_pack_upload_bytes,
568            dwt97_batch_resident_dct_handoff_count,
569            dwt97_batch_idct_row_lift_us,
570            dwt97_batch_column_lift_us,
571            dwt97_batch_resident_dwt_handoff_count,
572            dwt97_batch_quantize_codeblock_us,
573            dwt97_batch_ht_encode_us,
574            dwt97_batch_ht_kernel_us,
575            dwt97_batch_ht_status_readback_us,
576            dwt97_batch_ht_status_readback_transfers,
577            dwt97_batch_ht_status_readback_bytes,
578            dwt97_batch_ht_compact_us,
579            dwt97_batch_ht_output_readback_us,
580            dwt97_batch_ht_output_readback_transfers,
581            dwt97_batch_ht_output_readback_bytes,
582            dwt97_batch_ht_codeblock_dispatches,
583            dwt97_batch_readback_us,
584            dwt97_batch_readback_transfers,
585            dwt97_batch_readback_bytes,
586            htj2k_encode_us,
587            htj2k_encode_accelerator_dispatches,
588            htj2k_encode_ht_code_block_dispatches,
589            htj2k_encode_packetization_dispatches,
590            dicom_spool_write_us,
591            dicom_final_write_us,
592            tile_count,
593            component_count,
594            batch_count,
595            batch_jobs,
596            accelerator_attempts,
597            accelerator_jobs,
598            accelerator_dispatches,
599            accelerator_dispatched_jobs,
600            cpu_fallback_jobs,
601        );
602    }
603}
604
605/// Per-component transcode geometry preserved in the generated codestream.
606#[derive(Debug, Clone, PartialEq, Eq)]
607pub struct TranscodeComponentReport {
608    /// Component index in JPEG SOF order.
609    pub component_index: usize,
610    /// Native component width in samples before HTJ2K SIZ expansion.
611    pub width: u32,
612    /// Native component height in samples before HTJ2K SIZ expansion.
613    pub height: u32,
614    /// Number of DCT blocks per component row, including padded edge blocks.
615    pub block_cols: u32,
616    /// Number of DCT block rows, including padded edge blocks.
617    pub block_rows: u32,
618    /// HTJ2K SIZ horizontal sampling factor.
619    pub x_rsiz: u8,
620    /// HTJ2K SIZ vertical sampling factor.
621    pub y_rsiz: u8,
622}
623
624/// Error metrics from an optional validation oracle.
625pub type TranscodeValidationMetrics = ErrorMetrics;
626
627/// Classification for optional coefficient-validation metrics.
628#[derive(Debug, Clone, Copy, PartialEq, Eq)]
629pub enum TranscodeValidationClassification {
630    /// All compared coefficients match the selected oracle exactly.
631    Exact,
632    /// Coefficients satisfy the experimental one-LSB-bounded threshold:
633    /// maximum absolute error is at most one LSB and at least 99.9% of
634    /// coefficients match exactly.
635    OneLsbBounded,
636    /// Coefficients do not satisfy the exact or one-LSB-bounded thresholds.
637    OutsideThreshold,
638}
639
640impl TranscodeValidationClassification {
641    /// Classify validation metrics using the experimental acceptance
642    /// thresholds documented for this coefficient-domain path.
643    #[must_use]
644    pub fn classify_metrics(metrics: &TranscodeValidationMetrics) -> Self {
645        if metrics.exact_matches == metrics.total && metrics.max_abs_error == 0 {
646            Self::Exact
647        } else if metrics.is_one_lsb_bounded(0.999) {
648            Self::OneLsbBounded
649        } else {
650            Self::OutsideThreshold
651        }
652    }
653}
654
655/// Transcode summary for validation and benchmarking.
656#[derive(Debug, Clone, PartialEq, Eq)]
657pub struct TranscodeReport {
658    /// Source reference-grid width.
659    pub width: u32,
660    /// Source reference-grid height.
661    pub height: u32,
662    /// Number of transformed components.
663    pub component_count: usize,
664    /// Native transformed component geometry and SIZ sampling.
665    pub components: Vec<TranscodeComponentReport>,
666    /// Rounded coefficient metrics against the optional float IDCT-then-DWT
667    /// oracle.
668    pub float_reference_metrics: Option<TranscodeValidationMetrics>,
669    /// Threshold classification for `float_reference_metrics`.
670    pub float_reference_classification: Option<TranscodeValidationClassification>,
671    /// Rounded direct coefficients compared with j2k-jpeg scalar
672    /// ISLOW-IDCT-then-reversible-5/3 coefficients.
673    pub integer_reference_metrics: Option<TranscodeValidationMetrics>,
674    /// Threshold classification for `integer_reference_metrics`.
675    pub integer_reference_classification: Option<TranscodeValidationClassification>,
676    /// Number of DWT decomposition levels encoded.
677    pub decomposition_levels: u8,
678    /// Coefficient path used to generate the HTJ2K bands.
679    pub coefficient_path: JpegToHtj2kCoefficientPath,
680    /// Name of the experimental path used.
681    pub path: &'static str,
682    /// Wall-clock extraction time in microseconds.
683    pub extract_us: u128,
684    /// Wall-clock DCT-to-wavelet time in microseconds.
685    pub transform_us: u128,
686    /// Wall-clock HTJ2K encode time in microseconds.
687    pub encode_us: u128,
688    /// Detailed stage timings and accelerator/fallback counters.
689    pub timings: TranscodeTimingReport,
690}
691
692/// Error returned by the experimental transcode path.
693#[derive(Debug)]
694pub enum JpegToHtj2kError {
695    /// JPEG parse or entropy decode failed.
696    Jpeg(j2k_jpeg::JpegError),
697    /// Input is outside the currently implemented experimental slice.
698    Unsupported(&'static str),
699    /// DCT block grid metadata did not cover the component dimensions.
700    Grid(String),
701    /// DCT block grid metadata did not cover the component dimensions for the
702    /// 9/7 path.
703    Grid97(String),
704    /// Optional transform acceleration failed.
705    Accelerator(TranscodeStageError),
706    /// Validation metric inputs were inconsistent.
707    Metrics(String),
708    /// Validation encountered an out-of-range or non-finite coefficient.
709    Validation(&'static str),
710    /// Native HTJ2K encode failed.
711    Encode(&'static str),
712}
713
714impl fmt::Display for JpegToHtj2kError {
715    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
716        match self {
717            Self::Jpeg(err) => write!(f, "JPEG extraction failed: {err}"),
718            Self::Unsupported(reason) => write!(f, "unsupported transcode input: {reason}"),
719            Self::Grid(reason) | Self::Grid97(reason) => {
720                write!(f, "DCT grid transform failed: {reason}")
721            }
722            Self::Accelerator(reason) => write!(f, "transform accelerator failed: {reason}"),
723            Self::Metrics(reason) => write!(f, "validation metrics failed: {reason}"),
724            Self::Validation(reason) => write!(f, "validation failed: {reason}"),
725            Self::Encode(reason) => write!(f, "HTJ2K encode failed: {reason}"),
726        }
727    }
728}
729
730impl std::error::Error for JpegToHtj2kError {
731    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
732        match self {
733            Self::Jpeg(err) => Some(err),
734            Self::Unsupported(_)
735            | Self::Grid(_)
736            | Self::Grid97(_)
737            | Self::Accelerator(_)
738            | Self::Metrics(_)
739            | Self::Validation(_)
740            | Self::Encode(_) => None,
741        }
742    }
743}
744
745impl From<j2k_jpeg::JpegError> for JpegToHtj2kError {
746    fn from(value: j2k_jpeg::JpegError) -> Self {
747        Self::Jpeg(value)
748    }
749}
750
751fn dct53_grid_error(value: DctGridError) -> JpegToHtj2kError {
752    JpegToHtj2kError::Grid(value.to_string())
753}
754
755fn dct97_grid_error(value: DctGridError) -> JpegToHtj2kError {
756    JpegToHtj2kError::Grid97(value.to_string())
757}
758
759impl From<MetricsLengthError> for JpegToHtj2kError {
760    fn from(value: MetricsLengthError) -> Self {
761        Self::Metrics(value.to_string())
762    }
763}
764
765/// Transcode a constrained baseline grayscale JPEG tile into an HTJ2K
766/// codestream using direct DCT-domain wavelet coefficients.
767///
768/// Current implementation scope is baseline JPEG with one or more components
769/// at native JPEG component resolution. Component subsampling is preserved
770/// through SIZ `XRsiz`/`YRsiz` instead of chroma upsampling.
771pub fn jpeg_to_htj2k(
772    bytes: &[u8],
773    options: &JpegToHtj2kOptions,
774) -> Result<EncodedTranscode, JpegToHtj2kError> {
775    JpegToHtj2kTranscoder::default().transcode(bytes, options)
776}
777
778/// Transcode many JPEG tiles into HTJ2K codestreams.
779pub fn jpeg_to_htj2k_batch(
780    tiles: &[JpegTileBatchInput<'_>],
781    options: &JpegToHtj2kOptions,
782) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
783    JpegToHtj2kTranscoder::default().transcode_batch(tiles, options)
784}
785
786fn jpeg_tile_batch_to_htj2k_with_scratch<
787    A: DctToWaveletStageAccelerator,
788    E: J2kEncodeStageAccelerator,
789>(
790    tiles: &[JpegTileBatchInput<'_>],
791    options: &JpegToHtj2kOptions,
792    scratch: &mut JpegToHtj2kScratch,
793    accelerator: &mut A,
794    encode_accelerator: &mut E,
795) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
796    validate_transcode_options(options)?;
797    match options.coefficient_path {
798        JpegToHtj2kCoefficientPath::IntegerDirect53 => {}
799        JpegToHtj2kCoefficientPath::FloatDirectLinear97
800            if accelerator.supports_dwt97_batch()
801                || accelerator.supports_htj2k97_codeblock_batch() =>
802        {
803            return jpeg_float97_tile_batch_to_htj2k_with_scratch(
804                tiles,
805                options,
806                scratch,
807                accelerator,
808                encode_accelerator,
809            );
810        }
811        JpegToHtj2kCoefficientPath::FloatDirectLinear53
812        | JpegToHtj2kCoefficientPath::FloatDirectLinear97 => {
813            return Ok(transcode_tile_batch_individually(
814                tiles,
815                options,
816                scratch,
817                accelerator,
818                encode_accelerator,
819            ));
820        }
821    }
822
823    let extract_start = Instant::now();
824    let prepared_results = tiles
825        .par_iter()
826        .enumerate()
827        .map(|(tile_index, tile)| {
828            (
829                tile_index,
830                prepare_integer_batch_tile(tile_index, tile.bytes, options),
831            )
832        })
833        .collect::<Vec<_>>();
834    let extract_us = extract_start.elapsed().as_micros();
835    let mut tile_results: Vec<Option<Result<EncodedTranscode, JpegToHtj2kError>>> =
836        (0..tiles.len()).map(|_| None).collect();
837    let mut prepared_tiles = Vec::new();
838    for (tile_index, result) in prepared_results {
839        match result {
840            Ok(prepared) => prepared_tiles.push(prepared),
841            Err(error) => tile_results[tile_index] = Some(Err(error)),
842        }
843    }
844
845    let transform_start = Instant::now();
846    let mut timings = TranscodeTimingReport::default();
847    let (reversible_dwt53_batches, reversible_dwt53_batch_jobs) = transform_integer_batch_tiles(
848        &mut prepared_tiles,
849        options,
850        scratch,
851        accelerator,
852        &mut timings,
853    )?;
854    let transform_us = transform_start.elapsed().as_micros();
855    timings.jpeg_dct_extract_us = extract_us;
856    timings.dct_to_wavelet_total_us = transform_us;
857    timings.tile_count = prepared_tiles.len();
858
859    let encode_start = Instant::now();
860    let encoded_tiles = encode_integer_prepared_tiles(prepared_tiles, options, encode_accelerator);
861    for (tile_index, encoded) in encoded_tiles {
862        add_encode_timing_counters_from_result(&mut timings, &encoded);
863        tile_results[tile_index] = Some(encoded);
864    }
865    let encode_us = encode_start.elapsed().as_micros();
866    timings.htj2k_encode_us = encode_us;
867
868    let output_tiles = tile_results
869        .into_iter()
870        .map(|tile| {
871            tile.unwrap_or(Err(JpegToHtj2kError::Validation(
872                "batch transcode did not produce a tile result",
873            )))
874        })
875        .collect::<Vec<_>>();
876    Ok(batch_output(
877        output_tiles,
878        BatchTranscodeReport {
879            tile_count: tiles.len(),
880            successful_tiles: 0,
881            failed_tiles: 0,
882            transformed_components: reversible_dwt53_batch_jobs,
883            reversible_dwt53_batches,
884            reversible_dwt53_batch_jobs,
885            extract_us,
886            transform_us,
887            encode_us,
888            timings,
889            coefficient_path: options.coefficient_path,
890        },
891    ))
892}
893
894fn jpeg_float97_tile_batch_to_htj2k_with_scratch<
895    A: DctToWaveletStageAccelerator,
896    E: J2kEncodeStageAccelerator,
897>(
898    tiles: &[JpegTileBatchInput<'_>],
899    options: &JpegToHtj2kOptions,
900    scratch: &mut JpegToHtj2kScratch,
901    accelerator: &mut A,
902    encode_accelerator: &mut E,
903) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
904    let extract_start = Instant::now();
905    let prepared_results = tiles
906        .par_iter()
907        .enumerate()
908        .map(|(tile_index, tile)| {
909            (
910                tile_index,
911                prepare_float97_batch_tile(tile_index, tile.bytes, options),
912            )
913        })
914        .collect::<Vec<_>>();
915    let extract_us = extract_start.elapsed().as_micros();
916    let mut tile_results: Vec<Option<Result<EncodedTranscode, JpegToHtj2kError>>> =
917        (0..tiles.len()).map(|_| None).collect();
918    let mut prepared_tiles = Vec::new();
919    for (tile_index, result) in prepared_results {
920        match result {
921            Ok(prepared) => prepared_tiles.push(prepared),
922            Err(error) => tile_results[tile_index] = Some(Err(error)),
923        }
924    }
925
926    let transform_start = Instant::now();
927    let mut timings = TranscodeTimingReport::default();
928    let (_dwt97_batches, dwt97_batch_jobs) = transform_float97_batch_tiles(
929        &mut prepared_tiles,
930        options,
931        scratch,
932        accelerator,
933        &mut timings,
934    )?;
935    let transform_us = transform_start.elapsed().as_micros();
936    timings.jpeg_dct_extract_us = extract_us;
937    timings.dct_to_wavelet_total_us = transform_us;
938    timings.tile_count = prepared_tiles.len();
939
940    let encode_start = Instant::now();
941    let encoded_tiles = encode_float97_prepared_tiles(prepared_tiles, options, encode_accelerator);
942    for (tile_index, encoded) in encoded_tiles {
943        add_encode_timing_counters_from_result(&mut timings, &encoded);
944        tile_results[tile_index] = Some(encoded);
945    }
946    let encode_us = encode_start.elapsed().as_micros();
947    timings.htj2k_encode_us = encode_us;
948
949    let output_tiles = tile_results
950        .into_iter()
951        .map(|tile| {
952            tile.unwrap_or(Err(JpegToHtj2kError::Validation(
953                "9/7 batch transcode did not produce a tile result",
954            )))
955        })
956        .collect::<Vec<_>>();
957    Ok(batch_output(
958        output_tiles,
959        BatchTranscodeReport {
960            tile_count: tiles.len(),
961            successful_tiles: 0,
962            failed_tiles: 0,
963            transformed_components: dwt97_batch_jobs,
964            reversible_dwt53_batches: 0,
965            reversible_dwt53_batch_jobs: 0,
966            extract_us,
967            transform_us,
968            encode_us,
969            timings,
970            coefficient_path: options.coefficient_path,
971        },
972    ))
973}
974
975fn transcode_tile_batch_individually<
976    A: DctToWaveletStageAccelerator,
977    E: J2kEncodeStageAccelerator,
978>(
979    tiles: &[JpegTileBatchInput<'_>],
980    options: &JpegToHtj2kOptions,
981    scratch: &mut JpegToHtj2kScratch,
982    accelerator: &mut A,
983    encode_accelerator: &mut E,
984) -> EncodedTranscodeBatch {
985    let start = Instant::now();
986    let output_tiles = tiles
987        .iter()
988        .map(|tile| {
989            jpeg_to_htj2k_with_scratch(
990                tile.bytes,
991                options,
992                scratch,
993                accelerator,
994                encode_accelerator,
995            )
996        })
997        .collect::<Vec<_>>();
998    let mut timings = aggregate_tile_timings(&output_tiles);
999    timings.tile_count = output_tiles.iter().filter(|tile| tile.is_ok()).count();
1000    let elapsed_us = start.elapsed().as_micros();
1001    if timings.dct_to_wavelet_total_us == 0 {
1002        timings.dct_to_wavelet_total_us = elapsed_us
1003            .saturating_sub(timings.jpeg_dct_extract_us)
1004            .saturating_sub(timings.htj2k_encode_us);
1005    }
1006    batch_output(
1007        output_tiles,
1008        BatchTranscodeReport {
1009            tile_count: tiles.len(),
1010            successful_tiles: 0,
1011            failed_tiles: 0,
1012            transformed_components: timings.component_count,
1013            reversible_dwt53_batches: 0,
1014            reversible_dwt53_batch_jobs: 0,
1015            extract_us: timings.jpeg_dct_extract_us,
1016            transform_us: timings.dct_to_wavelet_total_us,
1017            encode_us: timings.htj2k_encode_us,
1018            timings,
1019            coefficient_path: options.coefficient_path,
1020        },
1021    )
1022}
1023
1024fn aggregate_tile_timings(
1025    tiles: &[Result<EncodedTranscode, JpegToHtj2kError>],
1026) -> TranscodeTimingReport {
1027    let mut timings = TranscodeTimingReport::default();
1028    for tile in tiles.iter().filter_map(|tile| tile.as_ref().ok()) {
1029        timings.add_assign(tile.report.timings);
1030    }
1031    timings
1032}
1033
1034fn batch_output(
1035    tiles: Vec<Result<EncodedTranscode, JpegToHtj2kError>>,
1036    mut report: BatchTranscodeReport,
1037) -> EncodedTranscodeBatch {
1038    report.successful_tiles = tiles.iter().filter(|tile| tile.is_ok()).count();
1039    report.failed_tiles = tiles.len().saturating_sub(report.successful_tiles);
1040    EncodedTranscodeBatch { tiles, report }
1041}
1042
1043struct IntegerBatchTile {
1044    tile_index: usize,
1045    jpeg: JpegDctImage,
1046    component_sampling: Vec<(u8, u8)>,
1047    decomposition_levels: u8,
1048    all_unit_sampled: bool,
1049    component_reports: Vec<TranscodeComponentReport>,
1050    precomputed_components: Vec<Option<PrecomputedHtj2k53Component>>,
1051    float_validation_actual: Vec<i32>,
1052    float_validation_expected: Vec<i32>,
1053    integer_validation_actual: Vec<i32>,
1054    integer_validation_expected: Vec<i32>,
1055    timings: TranscodeTimingReport,
1056}
1057
1058struct Float97BatchTile {
1059    tile_index: usize,
1060    jpeg: JpegDctImage,
1061    component_sampling: Vec<(u8, u8)>,
1062    decomposition_levels: u8,
1063    all_unit_sampled: bool,
1064    component_reports: Vec<TranscodeComponentReport>,
1065    precomputed_components: Vec<Option<PrecomputedHtj2k97Component>>,
1066    preencoded_compact_payload: Vec<u8>,
1067    preencoded_compact_components: Vec<Option<PreencodedHtj2k97CompactComponent>>,
1068    preencoded_components: Vec<Option<PreencodedHtj2k97Component>>,
1069    prequantized_components: Vec<Option<PrequantizedHtj2k97Component>>,
1070    float_validation_actual: Vec<i32>,
1071    float_validation_expected: Vec<i32>,
1072    timings: TranscodeTimingReport,
1073}
1074
1075struct Float97PrecomputedBatchRecord {
1076    tile_index: usize,
1077    jpeg: JpegDctImage,
1078    decomposition_levels: u8,
1079    all_unit_sampled: bool,
1080    component_reports: Vec<TranscodeComponentReport>,
1081    float_validation_actual: Vec<i32>,
1082    float_validation_expected: Vec<i32>,
1083    timings: TranscodeTimingReport,
1084}
1085
1086#[derive(Clone, Copy)]
1087struct BatchComponentRef {
1088    tile_index: usize,
1089    component_index: usize,
1090}
1091
1092fn prepare_integer_batch_tile(
1093    tile_index: usize,
1094    bytes: &[u8],
1095    options: &JpegToHtj2kOptions,
1096) -> Result<IntegerBatchTile, JpegToHtj2kError> {
1097    let extract_start = Instant::now();
1098    let jpeg = extract_dct_blocks(bytes, DctExtractOptions::default())?;
1099    let timings = TranscodeTimingReport {
1100        jpeg_dct_extract_us: extract_start.elapsed().as_micros(),
1101        tile_count: 1,
1102        ..TranscodeTimingReport::default()
1103    };
1104    if jpeg.components.is_empty() || jpeg.components.len() > 4 {
1105        return Err(JpegToHtj2kError::Unsupported(
1106            "unsupported JPEG component count for jpeg_to_htj2k",
1107        ));
1108    }
1109    let component_sampling =
1110        component_sampling_for_jpeg(&jpeg.components, jpeg.width, jpeg.height)?;
1111    let decomposition_levels = decomposition_levels_for_components(
1112        &jpeg.components,
1113        options.encode_options.num_decomposition_levels,
1114    )?;
1115    let all_unit_sampled = component_sampling
1116        .iter()
1117        .all(|&(x_rsiz, y_rsiz)| x_rsiz == 1 && y_rsiz == 1);
1118    let component_reports = jpeg
1119        .components
1120        .iter()
1121        .zip(component_sampling.iter().copied())
1122        .map(|(component, (x_rsiz, y_rsiz))| TranscodeComponentReport {
1123            component_index: component.component_index,
1124            width: component.width,
1125            height: component.height,
1126            block_cols: component.block_cols,
1127            block_rows: component.block_rows,
1128            x_rsiz,
1129            y_rsiz,
1130        })
1131        .collect::<Vec<_>>();
1132    let precomputed_components = (0..jpeg.components.len()).map(|_| None).collect();
1133
1134    Ok(IntegerBatchTile {
1135        tile_index,
1136        jpeg,
1137        component_sampling,
1138        decomposition_levels,
1139        all_unit_sampled,
1140        component_reports,
1141        precomputed_components,
1142        float_validation_actual: Vec::new(),
1143        float_validation_expected: Vec::new(),
1144        integer_validation_actual: Vec::new(),
1145        integer_validation_expected: Vec::new(),
1146        timings,
1147    })
1148}
1149
1150fn prepare_float97_batch_tile(
1151    tile_index: usize,
1152    bytes: &[u8],
1153    options: &JpegToHtj2kOptions,
1154) -> Result<Float97BatchTile, JpegToHtj2kError> {
1155    let extract_start = Instant::now();
1156    let jpeg = extract_dct_blocks(bytes, DctExtractOptions::dequantized_only())?;
1157    let timings = TranscodeTimingReport {
1158        jpeg_dct_extract_us: extract_start.elapsed().as_micros(),
1159        tile_count: 1,
1160        ..TranscodeTimingReport::default()
1161    };
1162    if jpeg.components.is_empty() || jpeg.components.len() > 4 {
1163        return Err(JpegToHtj2kError::Unsupported(
1164            "unsupported JPEG component count for jpeg_to_htj2k",
1165        ));
1166    }
1167    let component_sampling =
1168        component_sampling_for_jpeg(&jpeg.components, jpeg.width, jpeg.height)?;
1169    let decomposition_levels = decomposition_levels_for_components(
1170        &jpeg.components,
1171        options.encode_options.num_decomposition_levels,
1172    )?;
1173    let all_unit_sampled = component_sampling
1174        .iter()
1175        .all(|&(x_rsiz, y_rsiz)| x_rsiz == 1 && y_rsiz == 1);
1176    let component_reports = jpeg
1177        .components
1178        .iter()
1179        .zip(component_sampling.iter().copied())
1180        .map(|(component, (x_rsiz, y_rsiz))| TranscodeComponentReport {
1181            component_index: component.component_index,
1182            width: component.width,
1183            height: component.height,
1184            block_cols: component.block_cols,
1185            block_rows: component.block_rows,
1186            x_rsiz,
1187            y_rsiz,
1188        })
1189        .collect::<Vec<_>>();
1190    let precomputed_components = (0..jpeg.components.len()).map(|_| None).collect();
1191    let preencoded_compact_components = (0..jpeg.components.len()).map(|_| None).collect();
1192    let preencoded_components = (0..jpeg.components.len()).map(|_| None).collect();
1193    let prequantized_components = (0..jpeg.components.len()).map(|_| None).collect();
1194
1195    Ok(Float97BatchTile {
1196        tile_index,
1197        jpeg,
1198        component_sampling,
1199        decomposition_levels,
1200        all_unit_sampled,
1201        component_reports,
1202        precomputed_components,
1203        preencoded_compact_payload: Vec::new(),
1204        preencoded_compact_components,
1205        preencoded_components,
1206        prequantized_components,
1207        float_validation_actual: Vec::new(),
1208        float_validation_expected: Vec::new(),
1209        timings,
1210    })
1211}
1212
1213fn transform_integer_batch_tiles<A: DctToWaveletStageAccelerator>(
1214    tiles: &mut [IntegerBatchTile],
1215    options: &JpegToHtj2kOptions,
1216    scratch: &mut JpegToHtj2kScratch,
1217    accelerator: &mut A,
1218    timings: &mut TranscodeTimingReport,
1219) -> Result<(usize, usize), JpegToHtj2kError> {
1220    let groups = batch_component_groups(tiles);
1221    let mut batch_count = 0usize;
1222    let mut job_count = 0usize;
1223
1224    for group in groups {
1225        batch_count = batch_count.saturating_add(1);
1226        job_count = job_count.saturating_add(group.len());
1227        let wavelets =
1228            integer_wavelets_for_batch_group(&group, tiles, scratch, accelerator, timings)?;
1229        for (component_ref, wavelet) in group.into_iter().zip(wavelets) {
1230            store_integer_batch_wavelet(component_ref, &wavelet, tiles, options, scratch)?;
1231        }
1232    }
1233
1234    Ok((batch_count, job_count))
1235}
1236
1237fn transform_float97_batch_tiles<A: DctToWaveletStageAccelerator>(
1238    tiles: &mut [Float97BatchTile],
1239    options: &JpegToHtj2kOptions,
1240    scratch: &mut JpegToHtj2kScratch,
1241    accelerator: &mut A,
1242    timings: &mut TranscodeTimingReport,
1243) -> Result<(usize, usize), JpegToHtj2kError> {
1244    let groups = float97_batch_component_groups(tiles);
1245    let grouped_i16_preencoded = try_store_grouped_i16_preencoded_float97_batches(
1246        &groups,
1247        tiles,
1248        options,
1249        accelerator,
1250        timings,
1251    )?;
1252    let mut batch_count = 0usize;
1253    let mut job_count = 0usize;
1254
1255    for (group_index, group) in groups.into_iter().enumerate() {
1256        batch_count = batch_count.saturating_add(1);
1257        job_count = job_count.saturating_add(group.len());
1258        if grouped_i16_preencoded
1259            .get(group_index)
1260            .copied()
1261            .unwrap_or(false)
1262        {
1263            continue;
1264        }
1265        if try_store_prequantized_float97_batch_group(&group, tiles, options, accelerator, timings)?
1266        {
1267            continue;
1268        }
1269        let wavelets =
1270            float97_wavelets_for_batch_group(&group, tiles, scratch, accelerator, timings)?;
1271        for (component_ref, wavelet) in group.into_iter().zip(wavelets) {
1272            store_float97_batch_wavelet(component_ref, &wavelet, tiles, options, scratch)?;
1273        }
1274    }
1275
1276    Ok((batch_count, job_count))
1277}
1278
1279fn batch_component_groups(tiles: &[IntegerBatchTile]) -> Vec<Vec<BatchComponentRef>> {
1280    let mut groups: Vec<Vec<BatchComponentRef>> = Vec::new();
1281
1282    for (tile_index, tile) in tiles.iter().enumerate() {
1283        for (component_index, component) in tile.jpeg.components.iter().enumerate() {
1284            let component_ref = BatchComponentRef {
1285                tile_index,
1286                component_index,
1287            };
1288            if let Some(group) = groups.iter_mut().find(|group| {
1289                let first = group[0];
1290                same_batch_component_key(
1291                    &tiles[first.tile_index],
1292                    first.component_index,
1293                    tile,
1294                    component_index,
1295                )
1296            }) {
1297                group.push(component_ref);
1298            } else {
1299                let _ = component;
1300                groups.push(vec![component_ref]);
1301            }
1302        }
1303    }
1304
1305    groups
1306}
1307
1308fn float97_batch_component_groups(tiles: &[Float97BatchTile]) -> Vec<Vec<BatchComponentRef>> {
1309    let mut groups: Vec<Vec<BatchComponentRef>> = Vec::new();
1310
1311    for (tile_index, tile) in tiles.iter().enumerate() {
1312        for component_index in 0..tile.jpeg.components.len() {
1313            let component_ref = BatchComponentRef {
1314                tile_index,
1315                component_index,
1316            };
1317            if let Some(group) = groups.iter_mut().find(|group| {
1318                let first = group[0];
1319                same_float97_batch_component_key(
1320                    &tiles[first.tile_index],
1321                    first.component_index,
1322                    tile,
1323                    component_index,
1324                )
1325            }) {
1326                group.push(component_ref);
1327            } else {
1328                groups.push(vec![component_ref]);
1329            }
1330        }
1331    }
1332
1333    groups
1334}
1335
1336fn same_batch_component_key(
1337    left_tile: &IntegerBatchTile,
1338    left_component_index: usize,
1339    right_tile: &IntegerBatchTile,
1340    right_component_index: usize,
1341) -> bool {
1342    let left = &left_tile.jpeg.components[left_component_index];
1343    let right = &right_tile.jpeg.components[right_component_index];
1344    left.component_index == right.component_index
1345        && left.width == right.width
1346        && left.height == right.height
1347        && left.block_cols == right.block_cols
1348        && left.block_rows == right.block_rows
1349        && left_tile.component_sampling[left_component_index]
1350            == right_tile.component_sampling[right_component_index]
1351}
1352
1353fn same_float97_batch_component_key(
1354    left_tile: &Float97BatchTile,
1355    left_component_index: usize,
1356    right_tile: &Float97BatchTile,
1357    right_component_index: usize,
1358) -> bool {
1359    let left = &left_tile.jpeg.components[left_component_index];
1360    let right = &right_tile.jpeg.components[right_component_index];
1361    left.width == right.width
1362        && left.height == right.height
1363        && left.block_cols == right.block_cols
1364        && left.block_rows == right.block_rows
1365        && left_tile.component_sampling[left_component_index]
1366            == right_tile.component_sampling[right_component_index]
1367}
1368
1369fn integer_wavelets_for_batch_group<A: DctToWaveletStageAccelerator>(
1370    group: &[BatchComponentRef],
1371    tiles: &[IntegerBatchTile],
1372    scratch: &mut JpegToHtj2kScratch,
1373    accelerator: &mut A,
1374    timings: &mut TranscodeTimingReport,
1375) -> Result<Vec<IntegerWavelet>, JpegToHtj2kError> {
1376    let jobs = group
1377        .iter()
1378        .map(|component_ref| {
1379            integer_dct_job_for_component(
1380                &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index],
1381            )
1382        })
1383        .collect::<Result<Vec<_>, _>>()?;
1384    record_batch_attempt(timings, group.len());
1385    let accelerator_start = Instant::now();
1386    let accelerated = accelerator
1387        .dct_grid_to_reversible_dwt53_batch(&jobs)
1388        .map_err(JpegToHtj2kError::Accelerator)?;
1389    timings.dct_to_wavelet_accelerator_us = timings
1390        .dct_to_wavelet_accelerator_us
1391        .saturating_add(accelerator_start.elapsed().as_micros());
1392
1393    if let Some(first_levels) = accelerated {
1394        if first_levels.len() != group.len() {
1395            return Err(JpegToHtj2kError::Validation(
1396                "reversible 5/3 batch accelerator returned wrong component count",
1397            ));
1398        }
1399        timings.component_count = timings.component_count.saturating_add(group.len());
1400        record_accelerator_dispatch(timings, group.len());
1401        let decompose_start = Instant::now();
1402        let wavelets = first_levels
1403            .into_iter()
1404            .zip(group.iter().copied())
1405            .map(|(first_level, component_ref)| {
1406                integer_wavelet_from_first_level(
1407                    first_level,
1408                    tiles[component_ref.tile_index].decomposition_levels,
1409                )
1410            })
1411            .collect();
1412        timings.dwt_decompose_us = timings
1413            .dwt_decompose_us
1414            .saturating_add(decompose_start.elapsed().as_micros());
1415        return Ok(wavelets);
1416    }
1417
1418    group
1419        .iter()
1420        .map(|component_ref| {
1421            integer_direct_wavelet_from_component(
1422                &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index],
1423                tiles[component_ref.tile_index].decomposition_levels,
1424                scratch,
1425                accelerator,
1426                timings,
1427            )
1428        })
1429        .collect()
1430}
1431
1432fn i16_htj2k97_jobs_for_batch_group<'a>(
1433    group: &[BatchComponentRef],
1434    tiles: &'a [Float97BatchTile],
1435) -> Result<Vec<DctGridI16ToHtj2k97CodeBlockJob<'a>>, JpegToHtj2kError> {
1436    group
1437        .iter()
1438        .map(|component_ref| {
1439            let tile = &tiles[component_ref.tile_index];
1440            let component = &tile.jpeg.components[component_ref.component_index];
1441            let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
1442            validate_component_block_grid(component)?;
1443            Ok(DctGridI16ToHtj2k97CodeBlockJob {
1444                dequantized_blocks: &component.dequantized_blocks,
1445                block_cols: component.block_cols as usize,
1446                block_rows: component.block_rows as usize,
1447                width: component.width as usize,
1448                height: component.height as usize,
1449                x_rsiz,
1450                y_rsiz,
1451            })
1452        })
1453        .collect()
1454}
1455
1456fn store_compact_preencoded_component(
1457    tile: &mut Float97BatchTile,
1458    component_index: usize,
1459    batch_payload: &[u8],
1460    mut component: PreencodedHtj2k97CompactComponent,
1461) -> Result<(), JpegToHtj2kError> {
1462    if component_index >= tile.preencoded_compact_components.len() {
1463        return Err(JpegToHtj2kError::Validation(
1464            "compact preencoded component index out of range",
1465        ));
1466    }
1467
1468    for resolution in &mut component.resolutions {
1469        for subband in &mut resolution.subbands {
1470            for block in &mut subband.code_blocks {
1471                if block.payload_range.start > block.payload_range.end
1472                    || block.payload_range.end > batch_payload.len()
1473                {
1474                    return Err(JpegToHtj2kError::Validation(
1475                        "compact preencoded payload range out of bounds",
1476                    ));
1477                }
1478                let start = tile.preencoded_compact_payload.len();
1479                tile.preencoded_compact_payload
1480                    .extend_from_slice(&batch_payload[block.payload_range.clone()]);
1481                let end = tile.preencoded_compact_payload.len();
1482                block.payload_range = start..end;
1483            }
1484        }
1485    }
1486
1487    tile.preencoded_compact_components[component_index] = Some(component);
1488    Ok(())
1489}
1490
1491#[allow(clippy::too_many_lines)]
1492fn try_store_grouped_i16_preencoded_float97_batches<A: DctToWaveletStageAccelerator>(
1493    groups: &[Vec<BatchComponentRef>],
1494    tiles: &mut [Float97BatchTile],
1495    options: &JpegToHtj2kOptions,
1496    accelerator: &mut A,
1497    timings: &mut TranscodeTimingReport,
1498) -> Result<Vec<bool>, JpegToHtj2kError> {
1499    let mut handled = vec![false; groups.len()];
1500    if !accelerator.supports_htj2k97_i16_preencoded_batch()
1501        || options.validate_against_float_reference
1502        || groups.len() <= 1
1503    {
1504        return Ok(handled);
1505    }
1506
1507    let eligible_indices = groups
1508        .iter()
1509        .enumerate()
1510        .filter_map(|(index, group)| {
1511            let eligible = group
1512                .iter()
1513                .all(|component_ref| tiles[component_ref.tile_index].decomposition_levels == 1);
1514            eligible.then_some(index)
1515        })
1516        .collect::<Vec<_>>();
1517    if eligible_indices.len() <= 1 {
1518        return Ok(handled);
1519    }
1520
1521    let codeblock_options = htj2k97_codeblock_options(&options.encode_options);
1522    let total_jobs = eligible_indices
1523        .iter()
1524        .map(|&index| groups[index].len())
1525        .sum::<usize>();
1526    record_accelerator_attempt(timings, total_jobs);
1527    let accelerator_start = Instant::now();
1528    let jobs_by_group = eligible_indices
1529        .iter()
1530        .map(|&index| i16_htj2k97_jobs_for_batch_group(&groups[index], tiles))
1531        .collect::<Result<Vec<_>, JpegToHtj2kError>>()?;
1532    let batches = jobs_by_group
1533        .iter()
1534        .map(|jobs| DctGridI16ToHtj2k97CodeBlockBatch { jobs })
1535        .collect::<Vec<_>>();
1536    let compact_grouped_components = if accelerator.supports_htj2k97_compact_preencoded_batch() {
1537        accelerator
1538            .dct_grid_i16_to_htj2k97_compact_preencoded_batch_groups(&batches, codeblock_options)
1539            .map_err(JpegToHtj2kError::Accelerator)?
1540    } else {
1541        None
1542    };
1543    if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1544        add_dwt97_batch_stage_timings(timings, stage_timings);
1545    }
1546    if let Some(compact_grouped_components) = compact_grouped_components {
1547        timings.dct_to_wavelet_accelerator_us = timings
1548            .dct_to_wavelet_accelerator_us
1549            .saturating_add(accelerator_start.elapsed().as_micros());
1550        let compact_payload = compact_grouped_components.payload;
1551        let compact_groups = compact_grouped_components.groups;
1552        if compact_groups.len() != eligible_indices.len() {
1553            return Err(JpegToHtj2kError::Validation(
1554                "9/7 grouped i16 compact preencoded accelerator returned wrong group count",
1555            ));
1556        }
1557        for (&group_index, components) in eligible_indices.iter().zip(compact_groups) {
1558            let group = &groups[group_index];
1559            if components.len() != group.len() {
1560                return Err(JpegToHtj2kError::Validation(
1561                    "9/7 grouped i16 compact preencoded accelerator returned wrong component count",
1562                ));
1563            }
1564
1565            timings.component_count = timings.component_count.saturating_add(group.len());
1566            record_batch_dispatch(timings, group.len());
1567            for (component_ref, component) in group.iter().copied().zip(components) {
1568                store_compact_preencoded_component(
1569                    &mut tiles[component_ref.tile_index],
1570                    component_ref.component_index,
1571                    &compact_payload,
1572                    component,
1573                )?;
1574            }
1575            handled[group_index] = true;
1576        }
1577        return Ok(handled);
1578    }
1579
1580    let grouped_components = accelerator
1581        .dct_grid_i16_to_htj2k97_preencoded_batch_groups(&batches, codeblock_options)
1582        .map_err(JpegToHtj2kError::Accelerator)?;
1583    if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1584        add_dwt97_batch_stage_timings(timings, stage_timings);
1585    }
1586    timings.dct_to_wavelet_accelerator_us = timings
1587        .dct_to_wavelet_accelerator_us
1588        .saturating_add(accelerator_start.elapsed().as_micros());
1589
1590    let Some(grouped_components) = grouped_components else {
1591        return Ok(handled);
1592    };
1593    if grouped_components.len() != eligible_indices.len() {
1594        return Err(JpegToHtj2kError::Validation(
1595            "9/7 grouped i16 preencoded accelerator returned wrong group count",
1596        ));
1597    }
1598
1599    for (&group_index, components) in eligible_indices.iter().zip(grouped_components) {
1600        let group = &groups[group_index];
1601        if components.len() != group.len() {
1602            return Err(JpegToHtj2kError::Validation(
1603                "9/7 grouped i16 preencoded accelerator returned wrong component count",
1604            ));
1605        }
1606
1607        timings.component_count = timings.component_count.saturating_add(group.len());
1608        record_batch_dispatch(timings, group.len());
1609        for (component_ref, component) in group.iter().copied().zip(components) {
1610            tiles[component_ref.tile_index].preencoded_components[component_ref.component_index] =
1611                Some(component);
1612        }
1613        handled[group_index] = true;
1614    }
1615
1616    Ok(handled)
1617}
1618
1619#[allow(clippy::too_many_lines)]
1620fn try_store_prequantized_float97_batch_group<A: DctToWaveletStageAccelerator>(
1621    group: &[BatchComponentRef],
1622    tiles: &mut [Float97BatchTile],
1623    options: &JpegToHtj2kOptions,
1624    accelerator: &mut A,
1625    timings: &mut TranscodeTimingReport,
1626) -> Result<bool, JpegToHtj2kError> {
1627    if !(accelerator.supports_htj2k97_codeblock_batch()
1628        || accelerator.supports_htj2k97_i16_preencoded_batch())
1629        || options.validate_against_float_reference
1630        || group
1631            .iter()
1632            .any(|component_ref| tiles[component_ref.tile_index].decomposition_levels != 1)
1633    {
1634        return Ok(false);
1635    }
1636
1637    let codeblock_options = htj2k97_codeblock_options(&options.encode_options);
1638    if accelerator.supports_htj2k97_i16_preencoded_batch() {
1639        let jobs = i16_htj2k97_jobs_for_batch_group(group, tiles)?;
1640
1641        record_accelerator_attempt(timings, group.len());
1642        let accelerator_start = Instant::now();
1643        let compact_preencoded_components =
1644            if accelerator.supports_htj2k97_compact_preencoded_batch() {
1645                accelerator
1646                    .dct_grid_i16_to_htj2k97_compact_preencoded_batch(&jobs, codeblock_options)
1647                    .map_err(JpegToHtj2kError::Accelerator)?
1648            } else {
1649                None
1650            };
1651        if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1652            add_dwt97_batch_stage_timings(timings, stage_timings);
1653        }
1654        if let Some(compact_batch) = compact_preencoded_components {
1655            timings.dct_to_wavelet_accelerator_us = timings
1656                .dct_to_wavelet_accelerator_us
1657                .saturating_add(accelerator_start.elapsed().as_micros());
1658            if compact_batch.components.len() != group.len() {
1659                return Err(JpegToHtj2kError::Validation(
1660                    "9/7 i16 compact preencoded accelerator returned wrong component count",
1661                ));
1662            }
1663
1664            timings.component_count = timings.component_count.saturating_add(group.len());
1665            record_batch_dispatch(timings, group.len());
1666            for (component_ref, component) in group.iter().copied().zip(compact_batch.components) {
1667                store_compact_preencoded_component(
1668                    &mut tiles[component_ref.tile_index],
1669                    component_ref.component_index,
1670                    &compact_batch.payload,
1671                    component,
1672                )?;
1673            }
1674
1675            return Ok(true);
1676        }
1677
1678        let preencoded_components = accelerator
1679            .dct_grid_i16_to_htj2k97_preencoded_batch(&jobs, codeblock_options)
1680            .map_err(JpegToHtj2kError::Accelerator)?;
1681        if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1682            add_dwt97_batch_stage_timings(timings, stage_timings);
1683        }
1684        timings.dct_to_wavelet_accelerator_us = timings
1685            .dct_to_wavelet_accelerator_us
1686            .saturating_add(accelerator_start.elapsed().as_micros());
1687        if let Some(components) = preencoded_components {
1688            if components.len() != group.len() {
1689                return Err(JpegToHtj2kError::Validation(
1690                    "9/7 i16 preencoded accelerator returned wrong component count",
1691                ));
1692            }
1693
1694            timings.component_count = timings.component_count.saturating_add(group.len());
1695            record_batch_dispatch(timings, group.len());
1696            for (component_ref, component) in group.iter().copied().zip(components) {
1697                tiles[component_ref.tile_index].preencoded_components
1698                    [component_ref.component_index] = Some(component);
1699            }
1700
1701            return Ok(true);
1702        }
1703    }
1704
1705    let repack_start = Instant::now();
1706    let block_storage = group
1707        .par_iter()
1708        .map(|component_ref| {
1709            dct_blocks_to_8x8_f64(
1710                &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index]
1711                    .dequantized_blocks,
1712            )
1713        })
1714        .collect::<Vec<_>>();
1715    timings.jpeg_dct_repack_us = timings
1716        .jpeg_dct_repack_us
1717        .saturating_add(repack_start.elapsed().as_micros());
1718
1719    let jobs = group
1720        .iter()
1721        .zip(block_storage.iter())
1722        .map(|(component_ref, blocks)| {
1723            let tile = &tiles[component_ref.tile_index];
1724            let component = &tile.jpeg.components[component_ref.component_index];
1725            let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
1726            validate_component_block_grid(component)?;
1727            Ok(DctGridToHtj2k97CodeBlockJob {
1728                blocks,
1729                block_cols: component.block_cols as usize,
1730                block_rows: component.block_rows as usize,
1731                width: component.width as usize,
1732                height: component.height as usize,
1733                x_rsiz,
1734                y_rsiz,
1735            })
1736        })
1737        .collect::<Result<Vec<_>, JpegToHtj2kError>>()?;
1738
1739    record_accelerator_attempt(timings, group.len());
1740    let accelerator_start = Instant::now();
1741    let preencoded_components = accelerator
1742        .dct_grid_to_htj2k97_preencoded_batch(&jobs, codeblock_options)
1743        .map_err(JpegToHtj2kError::Accelerator)?;
1744    if let Some(components) = preencoded_components {
1745        if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1746            add_dwt97_batch_stage_timings(timings, stage_timings);
1747        }
1748        timings.dct_to_wavelet_accelerator_us = timings
1749            .dct_to_wavelet_accelerator_us
1750            .saturating_add(accelerator_start.elapsed().as_micros());
1751        if components.len() != group.len() {
1752            return Err(JpegToHtj2kError::Validation(
1753                "9/7 preencoded accelerator returned wrong component count",
1754            ));
1755        }
1756
1757        timings.component_count = timings.component_count.saturating_add(group.len());
1758        record_batch_dispatch(timings, group.len());
1759        for (component_ref, component) in group.iter().copied().zip(components) {
1760            tiles[component_ref.tile_index].preencoded_components[component_ref.component_index] =
1761                Some(component);
1762        }
1763
1764        return Ok(true);
1765    }
1766
1767    let accelerated_components = accelerator
1768        .dct_grid_to_htj2k97_codeblock_batch(&jobs, codeblock_options)
1769        .map_err(JpegToHtj2kError::Accelerator)?;
1770    if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1771        add_dwt97_batch_stage_timings(timings, stage_timings);
1772    }
1773    timings.dct_to_wavelet_accelerator_us = timings
1774        .dct_to_wavelet_accelerator_us
1775        .saturating_add(accelerator_start.elapsed().as_micros());
1776
1777    let Some(components) = accelerated_components else {
1778        return Ok(false);
1779    };
1780    if components.len() != group.len() {
1781        return Err(JpegToHtj2kError::Validation(
1782            "9/7 code-block accelerator returned wrong component count",
1783        ));
1784    }
1785
1786    timings.component_count = timings.component_count.saturating_add(group.len());
1787    record_batch_dispatch(timings, group.len());
1788    for (component_ref, component) in group.iter().copied().zip(components) {
1789        tiles[component_ref.tile_index].prequantized_components[component_ref.component_index] =
1790            Some(component);
1791    }
1792
1793    Ok(true)
1794}
1795
1796fn htj2k97_codeblock_options(options: &JpegToHtj2kEncodeOptions) -> Htj2k97CodeBlockOptions {
1797    Htj2k97CodeBlockOptions {
1798        bit_depth: 8,
1799        guard_bits: options.guard_bits.max(2),
1800        code_block_width_exp: options.code_block_width_exp,
1801        code_block_height_exp: options.code_block_height_exp,
1802        irreversible_quantization_scale: options.irreversible_quantization_scale,
1803        irreversible_quantization_subband_scales: options.irreversible_quantization_subband_scales,
1804    }
1805}
1806
1807fn native_progression_order(
1808    progression: J2kProgressionOrder,
1809) -> j2k_native::EncodeProgressionOrder {
1810    match progression {
1811        J2kProgressionOrder::Lrcp => j2k_native::EncodeProgressionOrder::Lrcp,
1812        J2kProgressionOrder::Rlcp => j2k_native::EncodeProgressionOrder::Rlcp,
1813        J2kProgressionOrder::Rpcl => j2k_native::EncodeProgressionOrder::Rpcl,
1814        J2kProgressionOrder::Pcrl => j2k_native::EncodeProgressionOrder::Pcrl,
1815        J2kProgressionOrder::Cprl => j2k_native::EncodeProgressionOrder::Cprl,
1816    }
1817}
1818
1819fn float97_wavelets_for_batch_group<A: DctToWaveletStageAccelerator>(
1820    group: &[BatchComponentRef],
1821    tiles: &[Float97BatchTile],
1822    scratch: &mut JpegToHtj2kScratch,
1823    accelerator: &mut A,
1824    timings: &mut TranscodeTimingReport,
1825) -> Result<Vec<ComponentWavelet97>, JpegToHtj2kError> {
1826    let repack_start = Instant::now();
1827    let block_storage = group
1828        .iter()
1829        .map(|component_ref| {
1830            dct_blocks_to_8x8_f64(
1831                &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index]
1832                    .dequantized_blocks,
1833            )
1834        })
1835        .collect::<Vec<_>>();
1836    timings.jpeg_dct_repack_us = timings
1837        .jpeg_dct_repack_us
1838        .saturating_add(repack_start.elapsed().as_micros());
1839
1840    let jobs = group
1841        .iter()
1842        .zip(block_storage.iter())
1843        .map(|(component_ref, blocks)| {
1844            let component =
1845                &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index];
1846            validate_component_block_grid(component)?;
1847            Ok(DctGridToDwt97Job {
1848                blocks,
1849                block_cols: component.block_cols as usize,
1850                block_rows: component.block_rows as usize,
1851                width: component.width as usize,
1852                height: component.height as usize,
1853            })
1854        })
1855        .collect::<Result<Vec<_>, JpegToHtj2kError>>()?;
1856
1857    record_batch_attempt(timings, group.len());
1858    let accelerator_start = Instant::now();
1859    let accelerated_first_levels = accelerator
1860        .dct_grid_to_dwt97_batch(&jobs)
1861        .map_err(JpegToHtj2kError::Accelerator)?;
1862    if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1863        add_dwt97_batch_stage_timings(timings, stage_timings);
1864    }
1865    timings.dct_to_wavelet_accelerator_us = timings
1866        .dct_to_wavelet_accelerator_us
1867        .saturating_add(accelerator_start.elapsed().as_micros());
1868
1869    if let Some(first_levels) = accelerated_first_levels {
1870        if first_levels.len() != group.len() {
1871            return Err(JpegToHtj2kError::Validation(
1872                "9/7 batch accelerator returned wrong component count",
1873            ));
1874        }
1875        timings.component_count = timings.component_count.saturating_add(group.len());
1876        record_accelerator_dispatch(timings, group.len());
1877        let decompose_start = Instant::now();
1878        let wavelets = first_levels
1879            .into_par_iter()
1880            .zip(group.par_iter().copied())
1881            .map(|(first_level, component_ref)| {
1882                decompose_97_from_first_level(
1883                    first_level,
1884                    usize::from(tiles[component_ref.tile_index].decomposition_levels),
1885                )
1886            })
1887            .collect::<Vec<_>>();
1888        timings.dwt_decompose_us = timings
1889            .dwt_decompose_us
1890            .saturating_add(decompose_start.elapsed().as_micros());
1891        return Ok(wavelets);
1892    }
1893
1894    group
1895        .iter()
1896        .map(|component_ref| {
1897            float_direct_97_wavelet_from_component(
1898                &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index],
1899                tiles[component_ref.tile_index].decomposition_levels,
1900                scratch,
1901                accelerator,
1902                timings,
1903            )
1904        })
1905        .collect()
1906}
1907
1908fn add_dwt97_batch_stage_timings(
1909    timings: &mut TranscodeTimingReport,
1910    stage_timings: Dwt97BatchStageTimings,
1911) {
1912    timings.dwt97_batch_pack_upload_us = timings
1913        .dwt97_batch_pack_upload_us
1914        .saturating_add(stage_timings.pack_upload_us);
1915    timings.dwt97_batch_pack_upload_transfers = timings
1916        .dwt97_batch_pack_upload_transfers
1917        .saturating_add(stage_timings.pack_upload_transfers);
1918    timings.dwt97_batch_pack_upload_bytes = timings
1919        .dwt97_batch_pack_upload_bytes
1920        .saturating_add(stage_timings.pack_upload_bytes);
1921    timings.dwt97_batch_resident_dct_handoff_count = timings
1922        .dwt97_batch_resident_dct_handoff_count
1923        .saturating_add(stage_timings.resident_dct_handoff_count);
1924    timings.dwt97_batch_idct_row_lift_us = timings
1925        .dwt97_batch_idct_row_lift_us
1926        .saturating_add(stage_timings.idct_row_lift_us);
1927    timings.dwt97_batch_column_lift_us = timings
1928        .dwt97_batch_column_lift_us
1929        .saturating_add(stage_timings.column_lift_us);
1930    timings.dwt97_batch_resident_dwt_handoff_count = timings
1931        .dwt97_batch_resident_dwt_handoff_count
1932        .saturating_add(stage_timings.resident_dwt_handoff_count);
1933    timings.dwt97_batch_quantize_codeblock_us = timings
1934        .dwt97_batch_quantize_codeblock_us
1935        .saturating_add(stage_timings.quantize_codeblock_us);
1936    timings.dwt97_batch_ht_encode_us = timings
1937        .dwt97_batch_ht_encode_us
1938        .saturating_add(stage_timings.ht_encode_us);
1939    timings.dwt97_batch_ht_kernel_us = timings
1940        .dwt97_batch_ht_kernel_us
1941        .saturating_add(stage_timings.ht_kernel_us);
1942    timings.dwt97_batch_ht_status_readback_us = timings
1943        .dwt97_batch_ht_status_readback_us
1944        .saturating_add(stage_timings.ht_status_readback_us);
1945    timings.dwt97_batch_ht_status_readback_transfers = timings
1946        .dwt97_batch_ht_status_readback_transfers
1947        .saturating_add(stage_timings.ht_status_readback_transfers);
1948    timings.dwt97_batch_ht_status_readback_bytes = timings
1949        .dwt97_batch_ht_status_readback_bytes
1950        .saturating_add(stage_timings.ht_status_readback_bytes);
1951    timings.dwt97_batch_ht_compact_us = timings
1952        .dwt97_batch_ht_compact_us
1953        .saturating_add(stage_timings.ht_compact_us);
1954    timings.dwt97_batch_ht_output_readback_us = timings
1955        .dwt97_batch_ht_output_readback_us
1956        .saturating_add(stage_timings.ht_output_readback_us);
1957    timings.dwt97_batch_ht_output_readback_transfers = timings
1958        .dwt97_batch_ht_output_readback_transfers
1959        .saturating_add(stage_timings.ht_output_readback_transfers);
1960    timings.dwt97_batch_ht_output_readback_bytes = timings
1961        .dwt97_batch_ht_output_readback_bytes
1962        .saturating_add(stage_timings.ht_output_readback_bytes);
1963    timings.dwt97_batch_ht_codeblock_dispatches = timings
1964        .dwt97_batch_ht_codeblock_dispatches
1965        .saturating_add(stage_timings.ht_codeblock_dispatches);
1966    timings.dwt97_batch_readback_us = timings
1967        .dwt97_batch_readback_us
1968        .saturating_add(stage_timings.readback_us);
1969    timings.dwt97_batch_readback_transfers = timings
1970        .dwt97_batch_readback_transfers
1971        .saturating_add(stage_timings.readback_transfers);
1972    timings.dwt97_batch_readback_bytes = timings
1973        .dwt97_batch_readback_bytes
1974        .saturating_add(stage_timings.readback_bytes);
1975}
1976
1977fn record_accelerator_attempt(timings: &mut TranscodeTimingReport, job_count: usize) {
1978    timings.accelerator_attempts = timings.accelerator_attempts.saturating_add(1);
1979    timings.accelerator_jobs = timings.accelerator_jobs.saturating_add(job_count);
1980}
1981
1982fn record_accelerator_dispatch(timings: &mut TranscodeTimingReport, job_count: usize) {
1983    timings.accelerator_dispatches = timings.accelerator_dispatches.saturating_add(1);
1984    timings.accelerator_dispatched_jobs = timings
1985        .accelerator_dispatched_jobs
1986        .saturating_add(job_count);
1987}
1988
1989fn record_batch_attempt(timings: &mut TranscodeTimingReport, job_count: usize) {
1990    timings.batch_count = timings.batch_count.saturating_add(1);
1991    timings.batch_jobs = timings.batch_jobs.saturating_add(job_count);
1992    record_accelerator_attempt(timings, job_count);
1993}
1994
1995fn record_batch_dispatch(timings: &mut TranscodeTimingReport, job_count: usize) {
1996    timings.batch_count = timings.batch_count.saturating_add(1);
1997    timings.batch_jobs = timings.batch_jobs.saturating_add(job_count);
1998    record_accelerator_dispatch(timings, job_count);
1999}
2000
2001fn record_cpu_fallback(timings: &mut TranscodeTimingReport, job_count: usize) {
2002    timings.cpu_fallback_jobs = timings.cpu_fallback_jobs.saturating_add(job_count);
2003}
2004
2005fn store_integer_batch_wavelet(
2006    component_ref: BatchComponentRef,
2007    wavelet: &IntegerWavelet,
2008    tiles: &mut [IntegerBatchTile],
2009    options: &JpegToHtj2kOptions,
2010    scratch: &mut JpegToHtj2kScratch,
2011) -> Result<(), JpegToHtj2kError> {
2012    let tile = &mut tiles[component_ref.tile_index];
2013    let component = &tile.jpeg.components[component_ref.component_index];
2014    let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
2015    let actual_coefficients = flatten_integer_wavelet(wavelet);
2016    tile.precomputed_components[component_ref.component_index] =
2017        Some(PrecomputedHtj2k53Component {
2018            x_rsiz,
2019            y_rsiz,
2020            dwt: j2k_dwt_from_integer_wavelet(wavelet),
2021        });
2022
2023    if options.validate_against_float_reference {
2024        tile.float_validation_actual
2025            .extend(actual_coefficients.clone());
2026        tile.float_validation_expected
2027            .extend(float_reference_coefficients(
2028                component,
2029                tile.decomposition_levels,
2030                scratch,
2031            )?);
2032    }
2033    if options.validate_against_integer_reference {
2034        tile.integer_validation_actual.extend(actual_coefficients);
2035        tile.integer_validation_expected
2036            .extend(integer_reference_coefficients(
2037                component,
2038                tile.decomposition_levels,
2039            )?);
2040    }
2041
2042    Ok(())
2043}
2044
2045fn store_float97_batch_wavelet(
2046    component_ref: BatchComponentRef,
2047    wavelet: &ComponentWavelet97,
2048    tiles: &mut [Float97BatchTile],
2049    options: &JpegToHtj2kOptions,
2050    scratch: &mut JpegToHtj2kScratch,
2051) -> Result<(), JpegToHtj2kError> {
2052    let tile = &mut tiles[component_ref.tile_index];
2053    let component = &tile.jpeg.components[component_ref.component_index];
2054    let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
2055    tile.precomputed_components[component_ref.component_index] =
2056        Some(PrecomputedHtj2k97Component {
2057            x_rsiz,
2058            y_rsiz,
2059            dwt: j2k_dwt97_from_wavelet(
2060                wavelet,
2061                component.width as usize,
2062                component.height as usize,
2063            ),
2064        });
2065
2066    if options.validate_against_float_reference {
2067        let actual_coefficients = rounded_wavelet97_i32(wavelet)?;
2068        tile.float_validation_actual.extend(actual_coefficients);
2069        tile.float_validation_expected
2070            .extend(float97_reference_coefficients(
2071                component,
2072                tile.decomposition_levels,
2073                scratch,
2074            )?);
2075    }
2076
2077    Ok(())
2078}
2079
2080fn record_encode_dispatch_delta(
2081    timings: &mut TranscodeTimingReport,
2082    before: J2kEncodeDispatchReport,
2083    after: J2kEncodeDispatchReport,
2084) {
2085    let delta = after.saturating_delta(before);
2086    timings.htj2k_encode_accelerator_dispatches = timings
2087        .htj2k_encode_accelerator_dispatches
2088        .saturating_add(delta.total());
2089    timings.htj2k_encode_ht_code_block_dispatches = timings
2090        .htj2k_encode_ht_code_block_dispatches
2091        .saturating_add(delta.ht_code_block);
2092    timings.htj2k_encode_packetization_dispatches = timings
2093        .htj2k_encode_packetization_dispatches
2094        .saturating_add(delta.packetization);
2095}
2096
2097fn add_encode_timing_counters_from_result(
2098    timings: &mut TranscodeTimingReport,
2099    tile: &Result<EncodedTranscode, JpegToHtj2kError>,
2100) {
2101    let Ok(tile) = tile else {
2102        return;
2103    };
2104    timings.htj2k_encode_accelerator_dispatches = timings
2105        .htj2k_encode_accelerator_dispatches
2106        .saturating_add(tile.report.timings.htj2k_encode_accelerator_dispatches);
2107    timings.htj2k_encode_ht_code_block_dispatches = timings
2108        .htj2k_encode_ht_code_block_dispatches
2109        .saturating_add(tile.report.timings.htj2k_encode_ht_code_block_dispatches);
2110    timings.htj2k_encode_packetization_dispatches = timings
2111        .htj2k_encode_packetization_dispatches
2112        .saturating_add(tile.report.timings.htj2k_encode_packetization_dispatches);
2113}
2114
2115fn encode_integer_prepared_tiles<E: J2kEncodeStageAccelerator>(
2116    prepared_tiles: Vec<IntegerBatchTile>,
2117    options: &JpegToHtj2kOptions,
2118    encode_accelerator: &mut E,
2119) -> Vec<(usize, Result<EncodedTranscode, JpegToHtj2kError>)> {
2120    if encode_accelerator.prefer_parallel_cpu_tile_encode() {
2121        return prepared_tiles
2122            .into_par_iter()
2123            .map(|prepared| {
2124                let tile_index = prepared.tile_index;
2125                let mut cpu_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
2126                (
2127                    tile_index,
2128                    encode_integer_batch_tile(prepared, options, &mut cpu_accelerator),
2129                )
2130            })
2131            .collect();
2132    }
2133
2134    prepared_tiles
2135        .into_iter()
2136        .map(|prepared| {
2137            let tile_index = prepared.tile_index;
2138            (
2139                tile_index,
2140                encode_integer_batch_tile(prepared, options, encode_accelerator),
2141            )
2142        })
2143        .collect()
2144}
2145
2146fn encode_float97_prepared_tiles<E: J2kEncodeStageAccelerator>(
2147    prepared_tiles: Vec<Float97BatchTile>,
2148    options: &JpegToHtj2kOptions,
2149    encode_accelerator: &mut E,
2150) -> Vec<(usize, Result<EncodedTranscode, JpegToHtj2kError>)> {
2151    if !encode_accelerator.prefer_parallel_cpu_tile_encode()
2152        && can_encode_float97_precomputed_tiles_batch(&prepared_tiles, options)
2153    {
2154        return encode_float97_precomputed_tiles_batch(prepared_tiles, options, encode_accelerator);
2155    }
2156
2157    if encode_accelerator.prefer_parallel_cpu_tile_encode() {
2158        return prepared_tiles
2159            .into_par_iter()
2160            .map(|prepared| {
2161                let tile_index = prepared.tile_index;
2162                let mut cpu_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
2163                (
2164                    tile_index,
2165                    encode_float97_batch_tile(prepared, options, &mut cpu_accelerator),
2166                )
2167            })
2168            .collect();
2169    }
2170
2171    prepared_tiles
2172        .into_iter()
2173        .map(|prepared| {
2174            let tile_index = prepared.tile_index;
2175            (
2176                tile_index,
2177                encode_float97_batch_tile(prepared, options, encode_accelerator),
2178            )
2179        })
2180        .collect()
2181}
2182
2183fn can_encode_float97_precomputed_tiles_batch(
2184    prepared_tiles: &[Float97BatchTile],
2185    options: &JpegToHtj2kOptions,
2186) -> bool {
2187    options.encode_options.num_layers == 1
2188        && prepared_tiles.iter().all(|tile| {
2189            tile.precomputed_components.iter().all(Option::is_some)
2190                && tile.preencoded_compact_payload.is_empty()
2191                && tile
2192                    .preencoded_compact_components
2193                    .iter()
2194                    .all(Option::is_none)
2195                && tile.preencoded_components.iter().all(Option::is_none)
2196                && tile.prequantized_components.iter().all(Option::is_none)
2197        })
2198}
2199
2200#[allow(clippy::too_many_lines)]
2201fn encode_float97_precomputed_tiles_batch<E: J2kEncodeStageAccelerator>(
2202    prepared_tiles: Vec<Float97BatchTile>,
2203    options: &JpegToHtj2kOptions,
2204    encode_accelerator: &mut E,
2205) -> Vec<(usize, Result<EncodedTranscode, JpegToHtj2kError>)> {
2206    let mut records = Vec::with_capacity(prepared_tiles.len());
2207    let mut images = Vec::with_capacity(prepared_tiles.len());
2208
2209    for tile in prepared_tiles {
2210        let Float97BatchTile {
2211            tile_index,
2212            jpeg,
2213            decomposition_levels,
2214            all_unit_sampled,
2215            component_reports,
2216            precomputed_components,
2217            preencoded_compact_payload: _,
2218            preencoded_compact_components: _,
2219            preencoded_components: _,
2220            prequantized_components: _,
2221            float_validation_actual,
2222            float_validation_expected,
2223            timings,
2224            ..
2225        } = tile;
2226        let components = match precomputed_components
2227            .into_iter()
2228            .map(|component| {
2229                component.ok_or(JpegToHtj2kError::Validation(
2230                    "9/7 precomputed batch transcode did not produce all components",
2231                ))
2232            })
2233            .collect::<Result<Vec<_>, _>>()
2234        {
2235            Ok(components) => components,
2236            Err(error) => return vec![(tile_index, Err(error))],
2237        };
2238        images.push(PrecomputedHtj2k97Image {
2239            width: jpeg.width,
2240            height: jpeg.height,
2241            bit_depth: 8,
2242            signed: false,
2243            components,
2244        });
2245        records.push(Float97PrecomputedBatchRecord {
2246            tile_index,
2247            jpeg,
2248            decomposition_levels,
2249            all_unit_sampled,
2250            component_reports,
2251            float_validation_actual,
2252            float_validation_expected,
2253            timings,
2254        });
2255    }
2256
2257    let encode_start = Instant::now();
2258    let encode_dispatch_before = encode_accelerator.dispatch_report();
2259    let native_images = images;
2260    let codestreams = {
2261        let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2262        let native_encode_options = options.encode_options.to_native();
2263        match encode_precomputed_htj2k_97_batch_with_accelerator(
2264            &native_images,
2265            &native_encode_options,
2266            &mut native_encode_accelerator,
2267        ) {
2268            Ok(codestreams) => codestreams,
2269            Err(error) => {
2270                return records
2271                    .into_iter()
2272                    .map(|record| (record.tile_index, Err(JpegToHtj2kError::Encode(error))))
2273                    .collect();
2274            }
2275        }
2276    };
2277    let encode_dispatch_after = encode_accelerator.dispatch_report();
2278    let encode_us = encode_start.elapsed().as_micros();
2279
2280    if codestreams.len() != records.len() {
2281        return records
2282            .into_iter()
2283            .map(|record| {
2284                (
2285                    record.tile_index,
2286                    Err(JpegToHtj2kError::Validation(
2287                        "9/7 precomputed batch encode returned the wrong tile count",
2288                    )),
2289                )
2290            })
2291            .collect();
2292    }
2293
2294    records
2295        .into_iter()
2296        .zip(codestreams)
2297        .enumerate()
2298        .map(|(batch_index, (record, codestream))| {
2299            let encode_measurement = (batch_index == 0).then_some((
2300                encode_dispatch_before,
2301                encode_dispatch_after,
2302                encode_us,
2303            ));
2304            (
2305                record.tile_index,
2306                encoded_float97_precomputed_batch_record(
2307                    record,
2308                    codestream,
2309                    options,
2310                    encode_measurement,
2311                ),
2312            )
2313        })
2314        .collect()
2315}
2316
2317fn encoded_float97_precomputed_batch_record(
2318    record: Float97PrecomputedBatchRecord,
2319    codestream: Vec<u8>,
2320    options: &JpegToHtj2kOptions,
2321    encode_measurement: Option<(J2kEncodeDispatchReport, J2kEncodeDispatchReport, u128)>,
2322) -> Result<EncodedTranscode, JpegToHtj2kError> {
2323    let Float97PrecomputedBatchRecord {
2324        jpeg,
2325        decomposition_levels,
2326        all_unit_sampled,
2327        component_reports,
2328        float_validation_actual,
2329        float_validation_expected,
2330        mut timings,
2331        ..
2332    } = record;
2333
2334    if let Some((encode_dispatch_before, encode_dispatch_after, encode_us)) = encode_measurement {
2335        record_encode_dispatch_delta(&mut timings, encode_dispatch_before, encode_dispatch_after);
2336        timings.htj2k_encode_us = encode_us;
2337    }
2338    let encode_us = timings.htj2k_encode_us;
2339    let float_reference_metrics = if options.validate_against_float_reference {
2340        Some(error_metrics_i32(
2341            &float_validation_actual,
2342            &float_validation_expected,
2343        )?)
2344    } else {
2345        None
2346    };
2347
2348    Ok(EncodedTranscode {
2349        codestream,
2350        report: TranscodeReport {
2351            width: jpeg.width,
2352            height: jpeg.height,
2353            component_count: jpeg.components.len(),
2354            components: component_reports,
2355            float_reference_classification: float_reference_metrics
2356                .as_ref()
2357                .map(TranscodeValidationClassification::classify_metrics),
2358            float_reference_metrics,
2359            integer_reference_classification: None,
2360            integer_reference_metrics: None,
2361            decomposition_levels,
2362            coefficient_path: options.coefficient_path,
2363            path: transcode_path_name(all_unit_sampled, options.coefficient_path),
2364            extract_us: timings.jpeg_dct_extract_us,
2365            transform_us: 0,
2366            encode_us,
2367            timings,
2368        },
2369    })
2370}
2371
2372fn encode_integer_batch_tile<E: J2kEncodeStageAccelerator>(
2373    tile: IntegerBatchTile,
2374    options: &JpegToHtj2kOptions,
2375    encode_accelerator: &mut E,
2376) -> Result<EncodedTranscode, JpegToHtj2kError> {
2377    let mut timings = tile.timings;
2378    let components = tile
2379        .precomputed_components
2380        .into_iter()
2381        .map(|component| {
2382            component.ok_or(JpegToHtj2kError::Validation(
2383                "integer batch transcode did not produce all components",
2384            ))
2385        })
2386        .collect::<Result<Vec<_>, _>>()?;
2387    let encode_start = Instant::now();
2388    let precomputed = PrecomputedHtj2k53Image {
2389        width: tile.jpeg.width,
2390        height: tile.jpeg.height,
2391        bit_depth: 8,
2392        signed: false,
2393        components,
2394    };
2395    let encode_dispatch_before = encode_accelerator.dispatch_report();
2396    let native_precomputed = precomputed;
2397    let codestream = {
2398        let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2399        let native_encode_options = options.encode_options.to_native();
2400        encode_precomputed_htj2k_53_with_accelerator(
2401            &native_precomputed,
2402            &native_encode_options,
2403            &mut native_encode_accelerator,
2404        )
2405        .map_err(JpegToHtj2kError::Encode)?
2406    };
2407    record_encode_dispatch_delta(
2408        &mut timings,
2409        encode_dispatch_before,
2410        encode_accelerator.dispatch_report(),
2411    );
2412    let encode_us = encode_start.elapsed().as_micros();
2413    timings.htj2k_encode_us = encode_us;
2414    let integer_reference_metrics = if options.validate_against_integer_reference {
2415        Some(error_metrics_i32(
2416            &tile.integer_validation_actual,
2417            &tile.integer_validation_expected,
2418        )?)
2419    } else {
2420        None
2421    };
2422    let float_reference_metrics = if options.validate_against_float_reference {
2423        Some(error_metrics_i32(
2424            &tile.float_validation_actual,
2425            &tile.float_validation_expected,
2426        )?)
2427    } else {
2428        None
2429    };
2430
2431    Ok(EncodedTranscode {
2432        codestream,
2433        report: TranscodeReport {
2434            width: tile.jpeg.width,
2435            height: tile.jpeg.height,
2436            component_count: tile.jpeg.components.len(),
2437            components: tile.component_reports,
2438            float_reference_classification: float_reference_metrics
2439                .as_ref()
2440                .map(TranscodeValidationClassification::classify_metrics),
2441            float_reference_metrics,
2442            integer_reference_classification: integer_reference_metrics
2443                .as_ref()
2444                .map(TranscodeValidationClassification::classify_metrics),
2445            integer_reference_metrics,
2446            decomposition_levels: tile.decomposition_levels,
2447            coefficient_path: options.coefficient_path,
2448            path: transcode_path_name(tile.all_unit_sampled, options.coefficient_path),
2449            extract_us: timings.jpeg_dct_extract_us,
2450            transform_us: 0,
2451            encode_us,
2452            timings,
2453        },
2454    })
2455}
2456
2457#[allow(clippy::too_many_lines)]
2458fn encode_float97_batch_tile<E: J2kEncodeStageAccelerator>(
2459    tile: Float97BatchTile,
2460    options: &JpegToHtj2kOptions,
2461    encode_accelerator: &mut E,
2462) -> Result<EncodedTranscode, JpegToHtj2kError> {
2463    let Float97BatchTile {
2464        jpeg,
2465        decomposition_levels,
2466        all_unit_sampled,
2467        component_reports,
2468        precomputed_components,
2469        preencoded_compact_payload,
2470        preencoded_compact_components,
2471        preencoded_components,
2472        prequantized_components,
2473        float_validation_actual,
2474        float_validation_expected,
2475        mut timings,
2476        ..
2477    } = tile;
2478
2479    let encode_start = Instant::now();
2480    let encode_dispatch_before = encode_accelerator.dispatch_report();
2481    let codestream = {
2482        let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2483        let native_encode_options = options.encode_options.to_native();
2484        if preencoded_compact_components.iter().any(Option::is_some) {
2485            let components = preencoded_compact_components
2486                .into_iter()
2487                .map(|component| {
2488                    component.ok_or(JpegToHtj2kError::Validation(
2489                        "9/7 compact preencoded batch transcode did not produce all components",
2490                    ))
2491                })
2492                .collect::<Result<Vec<_>, _>>()?;
2493            let preencoded = PreencodedHtj2k97CompactImage {
2494                width: jpeg.width,
2495                height: jpeg.height,
2496                bit_depth: 8,
2497                signed: false,
2498                payload: preencoded_compact_payload,
2499                components,
2500            };
2501            encode_preencoded_htj2k_97_compact_owned_with_accelerator(
2502                preencoded,
2503                &native_encode_options,
2504                &mut native_encode_accelerator,
2505            )
2506            .map_err(JpegToHtj2kError::Encode)?
2507        } else if preencoded_components.iter().any(Option::is_some) {
2508            let components = preencoded_components
2509                .into_iter()
2510                .map(|component| {
2511                    component.ok_or(JpegToHtj2kError::Validation(
2512                        "9/7 preencoded batch transcode did not produce all components",
2513                    ))
2514                })
2515                .collect::<Result<Vec<_>, _>>()?;
2516            let preencoded = PreencodedHtj2k97Image {
2517                width: jpeg.width,
2518                height: jpeg.height,
2519                bit_depth: 8,
2520                signed: false,
2521                components,
2522            };
2523            encode_preencoded_htj2k_97_owned_with_accelerator(
2524                preencoded,
2525                &native_encode_options,
2526                &mut native_encode_accelerator,
2527            )
2528            .map_err(JpegToHtj2kError::Encode)?
2529        } else if prequantized_components.iter().any(Option::is_some) {
2530            let components = prequantized_components
2531                .into_iter()
2532                .map(|component| {
2533                    component.ok_or(JpegToHtj2kError::Validation(
2534                        "9/7 code-block batch transcode did not produce all components",
2535                    ))
2536                })
2537                .collect::<Result<Vec<_>, _>>()?;
2538            let prequantized = PrequantizedHtj2k97Image {
2539                width: jpeg.width,
2540                height: jpeg.height,
2541                bit_depth: 8,
2542                signed: false,
2543                components,
2544            };
2545            let native_prequantized = prequantized;
2546            encode_prequantized_htj2k_97_with_accelerator(
2547                &native_prequantized,
2548                &native_encode_options,
2549                &mut native_encode_accelerator,
2550            )
2551            .map_err(JpegToHtj2kError::Encode)?
2552        } else {
2553            let components = precomputed_components
2554                .into_iter()
2555                .map(|component| {
2556                    component.ok_or(JpegToHtj2kError::Validation(
2557                        "9/7 batch transcode did not produce all components",
2558                    ))
2559                })
2560                .collect::<Result<Vec<_>, _>>()?;
2561            let precomputed = PrecomputedHtj2k97Image {
2562                width: jpeg.width,
2563                height: jpeg.height,
2564                bit_depth: 8,
2565                signed: false,
2566                components,
2567            };
2568            let native_precomputed = precomputed;
2569            encode_precomputed_htj2k_97_with_accelerator(
2570                &native_precomputed,
2571                &native_encode_options,
2572                &mut native_encode_accelerator,
2573            )
2574            .map_err(JpegToHtj2kError::Encode)?
2575        }
2576    };
2577    record_encode_dispatch_delta(
2578        &mut timings,
2579        encode_dispatch_before,
2580        encode_accelerator.dispatch_report(),
2581    );
2582    let encode_us = encode_start.elapsed().as_micros();
2583    timings.htj2k_encode_us = encode_us;
2584    let float_reference_metrics = if options.validate_against_float_reference {
2585        Some(error_metrics_i32(
2586            &float_validation_actual,
2587            &float_validation_expected,
2588        )?)
2589    } else {
2590        None
2591    };
2592
2593    Ok(EncodedTranscode {
2594        codestream,
2595        report: TranscodeReport {
2596            width: jpeg.width,
2597            height: jpeg.height,
2598            component_count: jpeg.components.len(),
2599            components: component_reports,
2600            float_reference_classification: float_reference_metrics
2601                .as_ref()
2602                .map(TranscodeValidationClassification::classify_metrics),
2603            float_reference_metrics,
2604            integer_reference_classification: None,
2605            integer_reference_metrics: None,
2606            decomposition_levels,
2607            coefficient_path: options.coefficient_path,
2608            path: transcode_path_name(all_unit_sampled, options.coefficient_path),
2609            extract_us: timings.jpeg_dct_extract_us,
2610            transform_us: 0,
2611            encode_us,
2612            timings,
2613        },
2614    })
2615}
2616
2617#[allow(clippy::too_many_lines)]
2618fn jpeg_to_htj2k_with_scratch<A: DctToWaveletStageAccelerator, E: J2kEncodeStageAccelerator>(
2619    bytes: &[u8],
2620    options: &JpegToHtj2kOptions,
2621    scratch: &mut JpegToHtj2kScratch,
2622    accelerator: &mut A,
2623    encode_accelerator: &mut E,
2624) -> Result<EncodedTranscode, JpegToHtj2kError> {
2625    validate_transcode_options(options)?;
2626    let mut timings = TranscodeTimingReport {
2627        tile_count: 1,
2628        ..TranscodeTimingReport::default()
2629    };
2630
2631    let extract_start = Instant::now();
2632    let jpeg = extract_dct_blocks(bytes, DctExtractOptions::default())?;
2633    let extract_us = extract_start.elapsed().as_micros();
2634    timings.jpeg_dct_extract_us = extract_us;
2635
2636    if jpeg.components.is_empty() || jpeg.components.len() > 4 {
2637        return Err(JpegToHtj2kError::Unsupported(
2638            "unsupported JPEG component count for jpeg_to_htj2k",
2639        ));
2640    }
2641    let component_sampling =
2642        component_sampling_for_jpeg(&jpeg.components, jpeg.width, jpeg.height)?;
2643    let decomposition_levels = decomposition_levels_for_components(
2644        &jpeg.components,
2645        options.encode_options.num_decomposition_levels,
2646    )?;
2647    let all_unit_sampled = component_sampling
2648        .iter()
2649        .all(|&(x_rsiz, y_rsiz)| x_rsiz == 1 && y_rsiz == 1);
2650    let component_reports = jpeg
2651        .components
2652        .iter()
2653        .zip(component_sampling.iter().copied())
2654        .map(|(component, (x_rsiz, y_rsiz))| TranscodeComponentReport {
2655            component_index: component.component_index,
2656            width: component.width,
2657            height: component.height,
2658            block_cols: component.block_cols,
2659            block_rows: component.block_rows,
2660            x_rsiz,
2661            y_rsiz,
2662        })
2663        .collect();
2664
2665    let transform_start = Instant::now();
2666    let component_batch = transcode_component_batch(
2667        &jpeg.components,
2668        &component_sampling,
2669        decomposition_levels,
2670        options,
2671        scratch,
2672        accelerator,
2673        &mut timings,
2674    )?;
2675    let transform_us = transform_start.elapsed().as_micros();
2676    timings.dct_to_wavelet_total_us = transform_us;
2677
2678    let encode_start = Instant::now();
2679    let encode_dispatch_before = encode_accelerator.dispatch_report();
2680    let native_encode_options = options.encode_options.to_native();
2681    let codestream = match component_batch.precomputed_components {
2682        PrecomputedComponentBatch::Dwt53(components) => {
2683            let precomputed = PrecomputedHtj2k53Image {
2684                width: jpeg.width,
2685                height: jpeg.height,
2686                bit_depth: 8,
2687                signed: false,
2688                components,
2689            };
2690            let native_precomputed = precomputed;
2691            let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2692            encode_precomputed_htj2k_53_with_accelerator(
2693                &native_precomputed,
2694                &native_encode_options,
2695                &mut native_encode_accelerator,
2696            )
2697            .map_err(JpegToHtj2kError::Encode)?
2698        }
2699        PrecomputedComponentBatch::Dwt97(components) => {
2700            let precomputed = PrecomputedHtj2k97Image {
2701                width: jpeg.width,
2702                height: jpeg.height,
2703                bit_depth: 8,
2704                signed: false,
2705                components,
2706            };
2707            let native_precomputed = precomputed;
2708            let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2709            encode_precomputed_htj2k_97_with_accelerator(
2710                &native_precomputed,
2711                &native_encode_options,
2712                &mut native_encode_accelerator,
2713            )
2714            .map_err(JpegToHtj2kError::Encode)?
2715        }
2716    };
2717    record_encode_dispatch_delta(
2718        &mut timings,
2719        encode_dispatch_before,
2720        encode_accelerator.dispatch_report(),
2721    );
2722    let encode_us = encode_start.elapsed().as_micros();
2723    timings.htj2k_encode_us = encode_us;
2724
2725    Ok(EncodedTranscode {
2726        codestream,
2727        report: TranscodeReport {
2728            width: jpeg.width,
2729            height: jpeg.height,
2730            component_count: jpeg.components.len(),
2731            components: component_reports,
2732            float_reference_classification: component_batch
2733                .float_reference_metrics
2734                .as_ref()
2735                .map(TranscodeValidationClassification::classify_metrics),
2736            float_reference_metrics: component_batch.float_reference_metrics,
2737            integer_reference_classification: component_batch
2738                .integer_reference_metrics
2739                .as_ref()
2740                .map(TranscodeValidationClassification::classify_metrics),
2741            integer_reference_metrics: component_batch.integer_reference_metrics,
2742            decomposition_levels,
2743            coefficient_path: options.coefficient_path,
2744            path: transcode_path_name(all_unit_sampled, options.coefficient_path),
2745            extract_us,
2746            transform_us,
2747            encode_us,
2748            timings,
2749        },
2750    })
2751}
2752
2753fn validate_transcode_options(options: &JpegToHtj2kOptions) -> Result<(), JpegToHtj2kError> {
2754    if !options.encode_options.use_ht_block_coding {
2755        return Err(JpegToHtj2kError::Unsupported(
2756            "jpeg_to_htj2k requires HT block coding",
2757        ));
2758    }
2759    if options.encode_options.use_mct {
2760        return Err(JpegToHtj2kError::Unsupported(
2761            "jpeg_to_htj2k requires use_mct=false because JPEG components stay in native color space",
2762        ));
2763    }
2764
2765    match (options.coefficient_path, options.encode_options.reversible) {
2766        (
2767            JpegToHtj2kCoefficientPath::IntegerDirect53
2768            | JpegToHtj2kCoefficientPath::FloatDirectLinear53,
2769            true,
2770        )
2771        | (JpegToHtj2kCoefficientPath::FloatDirectLinear97, false) => Ok(()),
2772        (
2773            JpegToHtj2kCoefficientPath::IntegerDirect53
2774            | JpegToHtj2kCoefficientPath::FloatDirectLinear53,
2775            false,
2776        ) => Err(JpegToHtj2kError::Unsupported(
2777            "5/3 coefficient path requires reversible HTJ2K encode",
2778        )),
2779        (JpegToHtj2kCoefficientPath::FloatDirectLinear97, true) => {
2780            Err(JpegToHtj2kError::Unsupported(
2781                "9/7 coefficient path requires irreversible HTJ2K encode",
2782            ))
2783        }
2784    }
2785}
2786
2787struct ComponentTranscodeBatch {
2788    precomputed_components: PrecomputedComponentBatch,
2789    float_reference_metrics: Option<TranscodeValidationMetrics>,
2790    integer_reference_metrics: Option<TranscodeValidationMetrics>,
2791}
2792
2793enum PrecomputedComponentBatch {
2794    Dwt53(Vec<PrecomputedHtj2k53Component>),
2795    Dwt97(Vec<PrecomputedHtj2k97Component>),
2796}
2797
2798struct ComponentTranscodeResult {
2799    precomputed: PrecomputedComponent,
2800    float_validation_coefficients: Option<(Vec<i32>, Vec<i32>)>,
2801    integer_validation_coefficients: Option<(Vec<i32>, Vec<i32>)>,
2802}
2803
2804enum PrecomputedComponent {
2805    Dwt53(PrecomputedHtj2k53Component),
2806    Dwt97(PrecomputedHtj2k97Component),
2807}
2808
2809struct ComponentWavelet {
2810    final_ll: Vec<f64>,
2811    final_ll_width: usize,
2812    final_ll_height: usize,
2813    levels: Vec<Dwt53TwoDimensional<f64>>,
2814}
2815
2816struct ComponentWavelet97 {
2817    final_ll: Vec<f64>,
2818    final_ll_width: usize,
2819    final_ll_height: usize,
2820    levels: Vec<Dwt97TwoDimensional<f64>>,
2821}
2822
2823struct IntegerWaveletLevel {
2824    width: usize,
2825    height: usize,
2826    low_width: usize,
2827    low_height: usize,
2828    high_width: usize,
2829    high_height: usize,
2830    hl: Vec<i32>,
2831    lh: Vec<i32>,
2832    hh: Vec<i32>,
2833}
2834
2835struct IntegerWavelet {
2836    final_ll: Vec<i32>,
2837    final_ll_width: usize,
2838    final_ll_height: usize,
2839    levels: Vec<IntegerWaveletLevel>,
2840}
2841
2842fn transcode_component_batch(
2843    components: &[JpegDctComponent],
2844    component_sampling: &[(u8, u8)],
2845    decomposition_levels: u8,
2846    options: &JpegToHtj2kOptions,
2847    scratch: &mut JpegToHtj2kScratch,
2848    accelerator: &mut impl DctToWaveletStageAccelerator,
2849    timings: &mut TranscodeTimingReport,
2850) -> Result<ComponentTranscodeBatch, JpegToHtj2kError> {
2851    if matches!(
2852        options.coefficient_path,
2853        JpegToHtj2kCoefficientPath::FloatDirectLinear97
2854    ) && options.validate_against_integer_reference
2855    {
2856        return Err(JpegToHtj2kError::Unsupported(
2857            "integer reversible validation is only defined for 5/3 coefficient paths",
2858        ));
2859    }
2860
2861    if matches!(
2862        options.coefficient_path,
2863        JpegToHtj2kCoefficientPath::IntegerDirect53
2864    ) {
2865        return transcode_integer_component_batch(
2866            components,
2867            component_sampling,
2868            decomposition_levels,
2869            options,
2870            scratch,
2871            accelerator,
2872            timings,
2873        );
2874    }
2875
2876    let mut precomputed_53 = Vec::with_capacity(components.len());
2877    let mut precomputed_97 = Vec::with_capacity(components.len());
2878    let mut float_validation_actual = Vec::new();
2879    let mut float_validation_expected = Vec::new();
2880    let mut integer_validation_actual = Vec::new();
2881    let mut integer_validation_expected = Vec::new();
2882
2883    for (component, (x_rsiz, y_rsiz)) in components.iter().zip(component_sampling.iter().copied()) {
2884        let component_result = component_to_precomputed_htj2k(
2885            component,
2886            x_rsiz,
2887            y_rsiz,
2888            decomposition_levels,
2889            options,
2890            scratch,
2891            accelerator,
2892            timings,
2893        )?;
2894        match component_result.precomputed {
2895            PrecomputedComponent::Dwt53(precomputed) => precomputed_53.push(precomputed),
2896            PrecomputedComponent::Dwt97(precomputed) => precomputed_97.push(precomputed),
2897        }
2898        if let Some((actual, expected)) = component_result.float_validation_coefficients {
2899            float_validation_actual.extend(actual);
2900            float_validation_expected.extend(expected);
2901        }
2902        if let Some((actual, expected)) = component_result.integer_validation_coefficients {
2903            integer_validation_actual.extend(actual);
2904            integer_validation_expected.extend(expected);
2905        }
2906    }
2907
2908    let float_reference_metrics = if options.validate_against_float_reference {
2909        Some(error_metrics_i32(
2910            &float_validation_actual,
2911            &float_validation_expected,
2912        )?)
2913    } else {
2914        None
2915    };
2916    let integer_reference_metrics = if options.validate_against_integer_reference {
2917        Some(error_metrics_i32(
2918            &integer_validation_actual,
2919            &integer_validation_expected,
2920        )?)
2921    } else {
2922        None
2923    };
2924
2925    let precomputed_components = if matches!(
2926        options.coefficient_path,
2927        JpegToHtj2kCoefficientPath::FloatDirectLinear97
2928    ) {
2929        PrecomputedComponentBatch::Dwt97(precomputed_97)
2930    } else {
2931        PrecomputedComponentBatch::Dwt53(precomputed_53)
2932    };
2933
2934    Ok(ComponentTranscodeBatch {
2935        precomputed_components,
2936        float_reference_metrics,
2937        integer_reference_metrics,
2938    })
2939}
2940
2941fn transcode_integer_component_batch(
2942    components: &[JpegDctComponent],
2943    component_sampling: &[(u8, u8)],
2944    decomposition_levels: u8,
2945    options: &JpegToHtj2kOptions,
2946    scratch: &mut JpegToHtj2kScratch,
2947    accelerator: &mut impl DctToWaveletStageAccelerator,
2948    timings: &mut TranscodeTimingReport,
2949) -> Result<ComponentTranscodeBatch, JpegToHtj2kError> {
2950    let mut precomputed_53: Vec<Option<PrecomputedHtj2k53Component>> =
2951        (0..components.len()).map(|_| None).collect();
2952    let mut float_validation_actual = Vec::new();
2953    let mut float_validation_expected = Vec::new();
2954    let mut integer_validation_actual = Vec::new();
2955    let mut integer_validation_expected = Vec::new();
2956
2957    for group in same_geometry_component_groups(components) {
2958        let group_wavelets = integer_wavelets_for_component_group(
2959            &group,
2960            components,
2961            decomposition_levels,
2962            scratch,
2963            accelerator,
2964            timings,
2965        )?;
2966        for (component_index, wavelet) in group.into_iter().zip(group_wavelets) {
2967            let component = &components[component_index];
2968            let (x_rsiz, y_rsiz) = component_sampling[component_index];
2969            let actual_coefficients = flatten_integer_wavelet(&wavelet);
2970            precomputed_53[component_index] = Some(PrecomputedHtj2k53Component {
2971                x_rsiz,
2972                y_rsiz,
2973                dwt: j2k_dwt_from_integer_wavelet(&wavelet),
2974            });
2975
2976            if options.validate_against_float_reference {
2977                float_validation_actual.extend(actual_coefficients.clone());
2978                float_validation_expected.extend(float_reference_coefficients(
2979                    component,
2980                    decomposition_levels,
2981                    scratch,
2982                )?);
2983            }
2984            if options.validate_against_integer_reference {
2985                integer_validation_actual.extend(actual_coefficients);
2986                integer_validation_expected.extend(integer_reference_coefficients(
2987                    component,
2988                    decomposition_levels,
2989                )?);
2990            }
2991        }
2992    }
2993
2994    let float_reference_metrics = if options.validate_against_float_reference {
2995        Some(error_metrics_i32(
2996            &float_validation_actual,
2997            &float_validation_expected,
2998        )?)
2999    } else {
3000        None
3001    };
3002    let integer_reference_metrics = if options.validate_against_integer_reference {
3003        Some(error_metrics_i32(
3004            &integer_validation_actual,
3005            &integer_validation_expected,
3006        )?)
3007    } else {
3008        None
3009    };
3010    let precomputed_components = precomputed_53
3011        .into_iter()
3012        .map(|component| {
3013            component.ok_or(JpegToHtj2kError::Validation(
3014                "integer transcode did not produce all components",
3015            ))
3016        })
3017        .collect::<Result<Vec<_>, _>>()?;
3018
3019    Ok(ComponentTranscodeBatch {
3020        precomputed_components: PrecomputedComponentBatch::Dwt53(precomputed_components),
3021        float_reference_metrics,
3022        integer_reference_metrics,
3023    })
3024}
3025
3026fn integer_wavelets_for_component_group(
3027    group: &[usize],
3028    components: &[JpegDctComponent],
3029    decomposition_levels: u8,
3030    scratch: &mut JpegToHtj2kScratch,
3031    accelerator: &mut impl DctToWaveletStageAccelerator,
3032    timings: &mut TranscodeTimingReport,
3033) -> Result<Vec<IntegerWavelet>, JpegToHtj2kError> {
3034    let jobs = group
3035        .iter()
3036        .map(|&component_index| integer_dct_job_for_component(&components[component_index]))
3037        .collect::<Result<Vec<_>, _>>()?;
3038    record_batch_attempt(timings, group.len());
3039    let accelerator_start = Instant::now();
3040    let accelerated_first_levels = accelerator
3041        .dct_grid_to_reversible_dwt53_batch(&jobs)
3042        .map_err(JpegToHtj2kError::Accelerator)?;
3043    timings.dct_to_wavelet_accelerator_us = timings
3044        .dct_to_wavelet_accelerator_us
3045        .saturating_add(accelerator_start.elapsed().as_micros());
3046
3047    if let Some(first_levels) = accelerated_first_levels {
3048        if first_levels.len() != group.len() {
3049            return Err(JpegToHtj2kError::Validation(
3050                "reversible 5/3 batch accelerator returned wrong component count",
3051            ));
3052        }
3053        timings.component_count = timings.component_count.saturating_add(group.len());
3054        record_accelerator_dispatch(timings, group.len());
3055        let decompose_start = Instant::now();
3056        let wavelets = first_levels
3057            .into_iter()
3058            .map(|first_level| integer_wavelet_from_first_level(first_level, decomposition_levels))
3059            .collect();
3060        timings.dwt_decompose_us = timings
3061            .dwt_decompose_us
3062            .saturating_add(decompose_start.elapsed().as_micros());
3063        return Ok(wavelets);
3064    }
3065
3066    group
3067        .iter()
3068        .map(|&component_index| {
3069            integer_direct_wavelet_from_component(
3070                &components[component_index],
3071                decomposition_levels,
3072                scratch,
3073                accelerator,
3074                timings,
3075            )
3076        })
3077        .collect()
3078}
3079
3080fn same_geometry_component_groups(components: &[JpegDctComponent]) -> Vec<Vec<usize>> {
3081    let mut assigned = vec![false; components.len()];
3082    let mut groups = Vec::new();
3083
3084    for component_index in 0..components.len() {
3085        if assigned[component_index] {
3086            continue;
3087        }
3088        assigned[component_index] = true;
3089        let mut group = vec![component_index];
3090        for candidate_index in component_index + 1..components.len() {
3091            if !assigned[candidate_index]
3092                && same_component_geometry(
3093                    &components[component_index],
3094                    &components[candidate_index],
3095                )
3096            {
3097                assigned[candidate_index] = true;
3098                group.push(candidate_index);
3099            }
3100        }
3101        groups.push(group);
3102    }
3103
3104    groups
3105}
3106
3107fn same_component_geometry(left: &JpegDctComponent, right: &JpegDctComponent) -> bool {
3108    left.width == right.width
3109        && left.height == right.height
3110        && left.block_cols == right.block_cols
3111        && left.block_rows == right.block_rows
3112}
3113
3114fn integer_dct_job_for_component(
3115    component: &JpegDctComponent,
3116) -> Result<DctGridToReversibleDwt53Job<'_>, JpegToHtj2kError> {
3117    validate_component_block_grid(component)?;
3118    Ok(DctGridToReversibleDwt53Job {
3119        dequantized_blocks: &component.dequantized_blocks,
3120        block_cols: component.block_cols as usize,
3121        block_rows: component.block_rows as usize,
3122        width: component.width as usize,
3123        height: component.height as usize,
3124    })
3125}
3126
3127#[allow(clippy::too_many_arguments)]
3128fn component_to_precomputed_htj2k(
3129    component: &JpegDctComponent,
3130    x_rsiz: u8,
3131    y_rsiz: u8,
3132    decomposition_levels: u8,
3133    options: &JpegToHtj2kOptions,
3134    scratch: &mut JpegToHtj2kScratch,
3135    accelerator: &mut impl DctToWaveletStageAccelerator,
3136    timings: &mut TranscodeTimingReport,
3137) -> Result<ComponentTranscodeResult, JpegToHtj2kError> {
3138    let (dwt, actual_coefficients) = match options.coefficient_path {
3139        JpegToHtj2kCoefficientPath::IntegerDirect53 => {
3140            let wavelet = integer_direct_wavelet_from_component(
3141                component,
3142                decomposition_levels,
3143                scratch,
3144                accelerator,
3145                timings,
3146            )?;
3147            (
3148                PrecomputedComponent::Dwt53(PrecomputedHtj2k53Component {
3149                    x_rsiz,
3150                    y_rsiz,
3151                    dwt: j2k_dwt_from_integer_wavelet(&wavelet),
3152                }),
3153                flatten_integer_wavelet(&wavelet),
3154            )
3155        }
3156        JpegToHtj2kCoefficientPath::FloatDirectLinear53 => {
3157            let wavelet = float_direct_wavelet_from_component(
3158                component,
3159                decomposition_levels,
3160                scratch,
3161                accelerator,
3162                timings,
3163            )?;
3164            (
3165                PrecomputedComponent::Dwt53(PrecomputedHtj2k53Component {
3166                    x_rsiz,
3167                    y_rsiz,
3168                    dwt: j2k_dwt_from_wavelet(
3169                        &wavelet,
3170                        component.width as usize,
3171                        component.height as usize,
3172                    ),
3173                }),
3174                rounded_wavelet_i32(&wavelet)?,
3175            )
3176        }
3177        JpegToHtj2kCoefficientPath::FloatDirectLinear97 => {
3178            let wavelet = float_direct_97_wavelet_from_component(
3179                component,
3180                decomposition_levels,
3181                scratch,
3182                accelerator,
3183                timings,
3184            )?;
3185            (
3186                PrecomputedComponent::Dwt97(PrecomputedHtj2k97Component {
3187                    x_rsiz,
3188                    y_rsiz,
3189                    dwt: j2k_dwt97_from_wavelet(
3190                        &wavelet,
3191                        component.width as usize,
3192                        component.height as usize,
3193                    ),
3194                }),
3195                rounded_wavelet97_i32(&wavelet)?,
3196            )
3197        }
3198    };
3199    let float_validation_coefficients = if options.validate_against_float_reference {
3200        let expected = match options.coefficient_path {
3201            JpegToHtj2kCoefficientPath::FloatDirectLinear97 => {
3202                float97_reference_coefficients(component, decomposition_levels, scratch)?
3203            }
3204            JpegToHtj2kCoefficientPath::IntegerDirect53
3205            | JpegToHtj2kCoefficientPath::FloatDirectLinear53 => {
3206                float_reference_coefficients(component, decomposition_levels, scratch)?
3207            }
3208        };
3209        Some((actual_coefficients.clone(), expected))
3210    } else {
3211        None
3212    };
3213    let integer_validation_coefficients = if options.validate_against_integer_reference {
3214        let expected = integer_reference_coefficients(component, decomposition_levels)?;
3215        Some((actual_coefficients, expected))
3216    } else {
3217        None
3218    };
3219
3220    Ok(ComponentTranscodeResult {
3221        precomputed: dwt,
3222        float_validation_coefficients,
3223        integer_validation_coefficients,
3224    })
3225}
3226
3227fn transcode_path_name(
3228    all_unit_sampled: bool,
3229    coefficient_path: JpegToHtj2kCoefficientPath,
3230) -> &'static str {
3231    match (all_unit_sampled, coefficient_path) {
3232        (true, JpegToHtj2kCoefficientPath::IntegerDirect53) => {
3233            "full_resolution_components_integer_direct_53"
3234        }
3235        (false, JpegToHtj2kCoefficientPath::IntegerDirect53) => {
3236            "native_component_sampling_integer_direct_53"
3237        }
3238        (true, JpegToHtj2kCoefficientPath::FloatDirectLinear53) => {
3239            "full_resolution_components_float_direct_53"
3240        }
3241        (false, JpegToHtj2kCoefficientPath::FloatDirectLinear53) => {
3242            "native_component_sampling_float_direct_53"
3243        }
3244        (true, JpegToHtj2kCoefficientPath::FloatDirectLinear97) => {
3245            "full_resolution_components_float_direct_97"
3246        }
3247        (false, JpegToHtj2kCoefficientPath::FloatDirectLinear97) => {
3248            "native_component_sampling_float_direct_97"
3249        }
3250    }
3251}
3252
3253fn float_direct_wavelet_from_component(
3254    component: &JpegDctComponent,
3255    decomposition_levels: u8,
3256    scratch: &mut JpegToHtj2kScratch,
3257    accelerator: &mut impl DctToWaveletStageAccelerator,
3258    timings: &mut TranscodeTimingReport,
3259) -> Result<ComponentWavelet, JpegToHtj2kError> {
3260    timings.component_count = timings.component_count.saturating_add(1);
3261    let repack_start = Instant::now();
3262    dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3263    timings.jpeg_dct_repack_us = timings
3264        .jpeg_dct_repack_us
3265        .saturating_add(repack_start.elapsed().as_micros());
3266    let blocks = &scratch.dct_blocks_f64;
3267    let job = DctGridToDwt53Job {
3268        blocks,
3269        block_cols: component.block_cols as usize,
3270        block_rows: component.block_rows as usize,
3271        width: component.width as usize,
3272        height: component.height as usize,
3273    };
3274    record_accelerator_attempt(timings, 1);
3275    let accelerator_start = Instant::now();
3276    let accelerated = accelerator
3277        .dct_grid_to_dwt53(job)
3278        .map_err(JpegToHtj2kError::Accelerator)?;
3279    timings.dct_to_wavelet_accelerator_us = timings
3280        .dct_to_wavelet_accelerator_us
3281        .saturating_add(accelerator_start.elapsed().as_micros());
3282    let bands = if let Some(bands) = accelerated {
3283        record_accelerator_dispatch(timings, 1);
3284        bands
3285    } else {
3286        record_cpu_fallback(timings, 1);
3287        let fallback_start = Instant::now();
3288        let bands = dct8x8_blocks_to_dwt53_float_linear_with_scratch(
3289            blocks,
3290            component.block_cols as usize,
3291            component.block_rows as usize,
3292            component.width as usize,
3293            component.height as usize,
3294            &mut scratch.dct53_grid,
3295        )
3296        .map_err(dct53_grid_error)?;
3297        timings.dct_to_wavelet_cpu_fallback_us = timings
3298            .dct_to_wavelet_cpu_fallback_us
3299            .saturating_add(fallback_start.elapsed().as_micros());
3300        bands
3301    };
3302    let decompose_start = Instant::now();
3303    let wavelet = decompose_from_first_level(bands, usize::from(decomposition_levels));
3304    timings.dwt_decompose_us = timings
3305        .dwt_decompose_us
3306        .saturating_add(decompose_start.elapsed().as_micros());
3307    Ok(wavelet)
3308}
3309
3310fn float_direct_97_wavelet_from_component(
3311    component: &JpegDctComponent,
3312    decomposition_levels: u8,
3313    scratch: &mut JpegToHtj2kScratch,
3314    accelerator: &mut impl DctToWaveletStageAccelerator,
3315    timings: &mut TranscodeTimingReport,
3316) -> Result<ComponentWavelet97, JpegToHtj2kError> {
3317    timings.component_count = timings.component_count.saturating_add(1);
3318    let repack_start = Instant::now();
3319    dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3320    timings.jpeg_dct_repack_us = timings
3321        .jpeg_dct_repack_us
3322        .saturating_add(repack_start.elapsed().as_micros());
3323    let blocks = &scratch.dct_blocks_f64;
3324    let job = DctGridToDwt97Job {
3325        blocks,
3326        block_cols: component.block_cols as usize,
3327        block_rows: component.block_rows as usize,
3328        width: component.width as usize,
3329        height: component.height as usize,
3330    };
3331    record_accelerator_attempt(timings, 1);
3332    let accelerator_start = Instant::now();
3333    let accelerated = accelerator
3334        .dct_grid_to_dwt97(job)
3335        .map_err(JpegToHtj2kError::Accelerator)?;
3336    timings.dct_to_wavelet_accelerator_us = timings
3337        .dct_to_wavelet_accelerator_us
3338        .saturating_add(accelerator_start.elapsed().as_micros());
3339    let bands = if let Some(bands) = accelerated {
3340        record_accelerator_dispatch(timings, 1);
3341        bands
3342    } else {
3343        record_cpu_fallback(timings, 1);
3344        let fallback_start = Instant::now();
3345        let bands = dct8x8_blocks_then_dwt97_float_with_scratch(
3346            blocks,
3347            component.block_cols as usize,
3348            component.block_rows as usize,
3349            component.width as usize,
3350            component.height as usize,
3351            &mut scratch.dct97_grid,
3352        )
3353        .map_err(dct97_grid_error)?;
3354        timings.dct_to_wavelet_cpu_fallback_us = timings
3355            .dct_to_wavelet_cpu_fallback_us
3356            .saturating_add(fallback_start.elapsed().as_micros());
3357        bands
3358    };
3359    let decompose_start = Instant::now();
3360    let wavelet = decompose_97_from_first_level_with_scratch(
3361        bands,
3362        usize::from(decomposition_levels),
3363        &mut scratch.dct97_grid,
3364    );
3365    timings.dwt_decompose_us = timings
3366        .dwt_decompose_us
3367        .saturating_add(decompose_start.elapsed().as_micros());
3368    Ok(wavelet)
3369}
3370
3371fn float_reference_coefficients(
3372    component: &JpegDctComponent,
3373    decomposition_levels: u8,
3374    scratch: &mut JpegToHtj2kScratch,
3375) -> Result<Vec<i32>, JpegToHtj2kError> {
3376    dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3377    let blocks = &scratch.dct_blocks_f64;
3378    let first_reference_level = dct8x8_blocks_then_dwt53_float(
3379        blocks,
3380        component.block_cols as usize,
3381        component.block_rows as usize,
3382        component.width as usize,
3383        component.height as usize,
3384    )
3385    .map_err(dct53_grid_error)?;
3386    let reference =
3387        decompose_from_first_level(first_reference_level, usize::from(decomposition_levels));
3388    rounded_wavelet_i32(&reference)
3389}
3390
3391fn float97_reference_coefficients(
3392    component: &JpegDctComponent,
3393    decomposition_levels: u8,
3394    scratch: &mut JpegToHtj2kScratch,
3395) -> Result<Vec<i32>, JpegToHtj2kError> {
3396    dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3397    let blocks = &scratch.dct_blocks_f64;
3398    let first_reference_level = dct8x8_blocks_then_dwt97_float(
3399        blocks,
3400        component.block_cols as usize,
3401        component.block_rows as usize,
3402        component.width as usize,
3403        component.height as usize,
3404    )
3405    .map_err(dct97_grid_error)?;
3406    let reference =
3407        decompose_97_from_first_level(first_reference_level, usize::from(decomposition_levels));
3408    rounded_wavelet97_i32(&reference)
3409}
3410
3411fn decompose_from_first_level(
3412    first_level: Dwt53TwoDimensional<f64>,
3413    decomposition_levels: usize,
3414) -> ComponentWavelet {
3415    let mut wavelet = ComponentWavelet {
3416        final_ll: first_level.ll.clone(),
3417        final_ll_width: first_level.low_width,
3418        final_ll_height: first_level.low_height,
3419        levels: vec![first_level],
3420    };
3421
3422    while wavelet.levels.len() < decomposition_levels {
3423        let next = linearized_53_2d_from_plane(
3424            &wavelet.final_ll,
3425            wavelet.final_ll_width,
3426            wavelet.final_ll_height,
3427        );
3428        wavelet.final_ll.clone_from(&next.ll);
3429        wavelet.final_ll_width = next.low_width;
3430        wavelet.final_ll_height = next.low_height;
3431        wavelet.levels.push(next);
3432    }
3433
3434    wavelet
3435}
3436
3437fn decompose_97_from_first_level(
3438    first_level: Dwt97TwoDimensional<f64>,
3439    decomposition_levels: usize,
3440) -> ComponentWavelet97 {
3441    let mut scratch = Dct97GridScratch::default();
3442    decompose_97_from_first_level_with_scratch(first_level, decomposition_levels, &mut scratch)
3443}
3444
3445fn decompose_97_from_first_level_with_scratch(
3446    first_level: Dwt97TwoDimensional<f64>,
3447    decomposition_levels: usize,
3448    scratch: &mut Dct97GridScratch,
3449) -> ComponentWavelet97 {
3450    let mut wavelet = ComponentWavelet97 {
3451        final_ll: first_level.ll.clone(),
3452        final_ll_width: first_level.low_width,
3453        final_ll_height: first_level.low_height,
3454        levels: vec![first_level],
3455    };
3456
3457    while wavelet.levels.len() < decomposition_levels {
3458        let next = linearized_97_2d_from_plane_with_scratch(
3459            &wavelet.final_ll,
3460            wavelet.final_ll_width,
3461            wavelet.final_ll_height,
3462            scratch,
3463        );
3464        wavelet.final_ll.clone_from(&next.ll);
3465        wavelet.final_ll_width = next.low_width;
3466        wavelet.final_ll_height = next.low_height;
3467        wavelet.levels.push(next);
3468    }
3469
3470    wavelet
3471}
3472
3473fn j2k_dwt_from_wavelet(
3474    wavelet: &ComponentWavelet,
3475    width: usize,
3476    height: usize,
3477) -> J2kForwardDwt53Output {
3478    let mut current_width = width;
3479    let mut current_height = height;
3480    let mut levels = Vec::with_capacity(wavelet.levels.len());
3481
3482    for level in &wavelet.levels {
3483        levels.push(J2kForwardDwt53Level {
3484            hl: level.hl.iter().map(|&value| value as f32).collect(),
3485            lh: level.lh.iter().map(|&value| value as f32).collect(),
3486            hh: level.hh.iter().map(|&value| value as f32).collect(),
3487            width: current_width as u32,
3488            height: current_height as u32,
3489            low_width: level.low_width as u32,
3490            low_height: level.low_height as u32,
3491            high_width: level.high_width as u32,
3492            high_height: level.high_height as u32,
3493        });
3494        current_width = level.low_width;
3495        current_height = level.low_height;
3496    }
3497    levels.reverse();
3498
3499    J2kForwardDwt53Output {
3500        ll: wavelet.final_ll.iter().map(|&value| value as f32).collect(),
3501        ll_width: wavelet.final_ll_width as u32,
3502        ll_height: wavelet.final_ll_height as u32,
3503        levels,
3504    }
3505}
3506
3507fn j2k_dwt97_from_wavelet(
3508    wavelet: &ComponentWavelet97,
3509    width: usize,
3510    height: usize,
3511) -> J2kForwardDwt97Output {
3512    let mut current_width = width;
3513    let mut current_height = height;
3514    let mut levels = Vec::with_capacity(wavelet.levels.len());
3515
3516    for level in &wavelet.levels {
3517        levels.push(J2kForwardDwt97Level {
3518            hl: level.hl.iter().map(|&value| value as f32).collect(),
3519            lh: level.lh.iter().map(|&value| value as f32).collect(),
3520            hh: level.hh.iter().map(|&value| value as f32).collect(),
3521            width: current_width as u32,
3522            height: current_height as u32,
3523            low_width: level.low_width as u32,
3524            low_height: level.low_height as u32,
3525            high_width: level.high_width as u32,
3526            high_height: level.high_height as u32,
3527        });
3528        current_width = level.low_width;
3529        current_height = level.low_height;
3530    }
3531    levels.reverse();
3532
3533    J2kForwardDwt97Output {
3534        ll: wavelet.final_ll.iter().map(|&value| value as f32).collect(),
3535        ll_width: wavelet.final_ll_width as u32,
3536        ll_height: wavelet.final_ll_height as u32,
3537        levels,
3538    }
3539}
3540
3541fn j2k_dwt_from_integer_wavelet(wavelet: &IntegerWavelet) -> J2kForwardDwt53Output {
3542    let mut levels = Vec::with_capacity(wavelet.levels.len());
3543    for level in &wavelet.levels {
3544        levels.push(J2kForwardDwt53Level {
3545            hl: level.hl.iter().map(|&value| value as f32).collect(),
3546            lh: level.lh.iter().map(|&value| value as f32).collect(),
3547            hh: level.hh.iter().map(|&value| value as f32).collect(),
3548            width: level.width as u32,
3549            height: level.height as u32,
3550            low_width: level.low_width as u32,
3551            low_height: level.low_height as u32,
3552            high_width: level.high_width as u32,
3553            high_height: level.high_height as u32,
3554        });
3555    }
3556    levels.reverse();
3557
3558    J2kForwardDwt53Output {
3559        ll: wavelet.final_ll.iter().map(|&value| value as f32).collect(),
3560        ll_width: wavelet.final_ll_width as u32,
3561        ll_height: wavelet.final_ll_height as u32,
3562        levels,
3563    }
3564}
3565
3566fn rounded_wavelet_i32(wavelet: &ComponentWavelet) -> Result<Vec<i32>, JpegToHtj2kError> {
3567    let coefficient_count = wavelet.final_ll.len()
3568        + wavelet
3569            .levels
3570            .iter()
3571            .map(|level| level.hl.len() + level.lh.len() + level.hh.len())
3572            .sum::<usize>();
3573    let mut output = Vec::with_capacity(coefficient_count);
3574    append_rounded_i32(&wavelet.final_ll, &mut output)?;
3575    for level in wavelet.levels.iter().rev() {
3576        append_rounded_i32(&level.hl, &mut output)?;
3577        append_rounded_i32(&level.lh, &mut output)?;
3578        append_rounded_i32(&level.hh, &mut output)?;
3579    }
3580    Ok(output)
3581}
3582
3583fn rounded_wavelet97_i32(wavelet: &ComponentWavelet97) -> Result<Vec<i32>, JpegToHtj2kError> {
3584    let coefficient_count = wavelet.final_ll.len()
3585        + wavelet
3586            .levels
3587            .iter()
3588            .map(|level| level.hl.len() + level.lh.len() + level.hh.len())
3589            .sum::<usize>();
3590    let mut output = Vec::with_capacity(coefficient_count);
3591    append_rounded_i32(&wavelet.final_ll, &mut output)?;
3592    for level in wavelet.levels.iter().rev() {
3593        append_rounded_i32(&level.hl, &mut output)?;
3594        append_rounded_i32(&level.lh, &mut output)?;
3595        append_rounded_i32(&level.hh, &mut output)?;
3596    }
3597    Ok(output)
3598}
3599
3600fn integer_direct_wavelet_from_component(
3601    component: &JpegDctComponent,
3602    decomposition_levels: u8,
3603    scratch: &mut JpegToHtj2kScratch,
3604    accelerator: &mut impl DctToWaveletStageAccelerator,
3605    timings: &mut TranscodeTimingReport,
3606) -> Result<IntegerWavelet, JpegToHtj2kError> {
3607    let job = integer_dct_job_for_component(component)?;
3608    timings.component_count = timings.component_count.saturating_add(1);
3609    record_accelerator_attempt(timings, 1);
3610    let accelerator_start = Instant::now();
3611    let accelerated_first_level = accelerator
3612        .dct_grid_to_reversible_dwt53(job)
3613        .map_err(JpegToHtj2kError::Accelerator)?;
3614    timings.dct_to_wavelet_accelerator_us = timings
3615        .dct_to_wavelet_accelerator_us
3616        .saturating_add(accelerator_start.elapsed().as_micros());
3617    if let Some(first_level) = accelerated_first_level {
3618        record_accelerator_dispatch(timings, 1);
3619        let decompose_start = Instant::now();
3620        let wavelet = integer_wavelet_from_first_level(first_level, decomposition_levels);
3621        timings.dwt_decompose_us = timings
3622            .dwt_decompose_us
3623            .saturating_add(decompose_start.elapsed().as_micros());
3624        return Ok(wavelet);
3625    }
3626
3627    scratch.integer_idct_blocks.clear();
3628    scratch
3629        .integer_idct_blocks
3630        .resize_with(component.dequantized_blocks.len(), || None);
3631    record_cpu_fallback(timings, 1);
3632    let fallback_start = Instant::now();
3633    let (final_ll, final_ll_width, final_ll_height, first_level) =
3634        integer_direct_first_level_from_component(
3635            component,
3636            &mut scratch.integer_idct_blocks,
3637            &mut scratch.integer_row,
3638        )?;
3639    timings.dct_to_wavelet_cpu_fallback_us = timings
3640        .dct_to_wavelet_cpu_fallback_us
3641        .saturating_add(fallback_start.elapsed().as_micros());
3642    let decompose_start = Instant::now();
3643    let wavelet = integer_wavelet_from_first_parts(
3644        final_ll,
3645        final_ll_width,
3646        final_ll_height,
3647        first_level,
3648        decomposition_levels,
3649    );
3650    timings.dwt_decompose_us = timings
3651        .dwt_decompose_us
3652        .saturating_add(decompose_start.elapsed().as_micros());
3653    Ok(wavelet)
3654}
3655
3656fn integer_wavelet_from_first_level(
3657    first_level: ReversibleDwt53FirstLevel,
3658    decomposition_levels: u8,
3659) -> IntegerWavelet {
3660    let (final_ll, final_ll_width, final_ll_height, first_level) =
3661        integer_wavelet_first_level_from_accelerated(first_level);
3662    integer_wavelet_from_first_parts(
3663        final_ll,
3664        final_ll_width,
3665        final_ll_height,
3666        first_level,
3667        decomposition_levels,
3668    )
3669}
3670
3671fn integer_wavelet_from_first_parts(
3672    mut final_ll: Vec<i32>,
3673    mut final_ll_width: usize,
3674    mut final_ll_height: usize,
3675    first_level: IntegerWaveletLevel,
3676    decomposition_levels: u8,
3677) -> IntegerWavelet {
3678    let mut levels = vec![first_level];
3679
3680    let remaining_levels = usize::from(decomposition_levels.saturating_sub(1));
3681    if remaining_levels > 0 {
3682        let tail =
3683            reversible_dwt53_i32(final_ll, final_ll_width, final_ll_height, remaining_levels);
3684        final_ll = tail.final_ll;
3685        final_ll_width = tail.final_ll_width;
3686        final_ll_height = tail.final_ll_height;
3687        levels.extend(tail.levels);
3688    }
3689
3690    IntegerWavelet {
3691        final_ll,
3692        final_ll_width,
3693        final_ll_height,
3694        levels,
3695    }
3696}
3697
3698fn integer_wavelet_first_level_from_accelerated(
3699    first_level: ReversibleDwt53FirstLevel,
3700) -> (Vec<i32>, usize, usize, IntegerWaveletLevel) {
3701    let level = IntegerWaveletLevel {
3702        width: first_level.low_width + first_level.high_width,
3703        height: first_level.low_height + first_level.high_height,
3704        low_width: first_level.low_width,
3705        low_height: first_level.low_height,
3706        high_width: first_level.high_width,
3707        high_height: first_level.high_height,
3708        hl: first_level.hl,
3709        lh: first_level.lh,
3710        hh: first_level.hh,
3711    };
3712    (
3713        first_level.ll,
3714        first_level.low_width,
3715        first_level.low_height,
3716        level,
3717    )
3718}
3719
3720fn integer_direct_first_level_from_component(
3721    component: &JpegDctComponent,
3722    idct_blocks: &mut [Option<[i32; 64]>],
3723    row: &mut Vec<i32>,
3724) -> Result<(Vec<i32>, usize, usize, IntegerWaveletLevel), JpegToHtj2kError> {
3725    let width = component.width as usize;
3726    let height = component.height as usize;
3727    let low_width = width.div_ceil(2);
3728    let low_height = height.div_ceil(2);
3729    let high_width = width / 2;
3730    let high_height = height / 2;
3731
3732    let mut ll = Vec::with_capacity(low_width * low_height);
3733    let mut hl = Vec::with_capacity(high_width * low_height);
3734    let mut lh = Vec::with_capacity(low_width * high_height);
3735    let mut hh = Vec::with_capacity(high_width * high_height);
3736    row.clear();
3737    if row.capacity() < width {
3738        row.reserve(width - row.capacity());
3739    }
3740
3741    for output_y in 0..low_height {
3742        row.clear();
3743        for x in 0..width {
3744            row.push(vertical_53_i32_at(
3745                component,
3746                idct_blocks,
3747                x,
3748                output_y,
3749                true,
3750            )?);
3751        }
3752        reversible_lift_53_i32(row);
3753        ll.extend(row.iter().step_by(2).copied());
3754        hl.extend(row.iter().skip(1).step_by(2).copied());
3755    }
3756
3757    for output_y in 0..high_height {
3758        row.clear();
3759        for x in 0..width {
3760            row.push(vertical_53_i32_at(
3761                component,
3762                idct_blocks,
3763                x,
3764                output_y,
3765                false,
3766            )?);
3767        }
3768        reversible_lift_53_i32(row);
3769        lh.extend(row.iter().step_by(2).copied());
3770        hh.extend(row.iter().skip(1).step_by(2).copied());
3771    }
3772
3773    let level = IntegerWaveletLevel {
3774        width,
3775        height,
3776        low_width,
3777        low_height,
3778        high_width,
3779        high_height,
3780        hl,
3781        lh,
3782        hh,
3783    };
3784
3785    Ok((ll, low_width, low_height, level))
3786}
3787
3788fn vertical_53_i32_at(
3789    component: &JpegDctComponent,
3790    idct_blocks: &mut [Option<[i32; 64]>],
3791    x: usize,
3792    output_y: usize,
3793    low_pass: bool,
3794) -> Result<i32, JpegToHtj2kError> {
3795    if low_pass {
3796        vertical_low_53_i32_at(component, idct_blocks, x, output_y)
3797    } else {
3798        vertical_high_53_i32_at(component, idct_blocks, x, output_y)
3799    }
3800}
3801
3802fn vertical_low_53_i32_at(
3803    component: &JpegDctComponent,
3804    idct_blocks: &mut [Option<[i32; 64]>],
3805    x: usize,
3806    low_idx: usize,
3807) -> Result<i32, JpegToHtj2kError> {
3808    let height = component.height as usize;
3809    reversible_lift_53_low_at_fallible(height, low_idx, |y| {
3810        component_sample_i32(component, idct_blocks, x, y)
3811    })
3812}
3813
3814fn vertical_high_53_i32_at(
3815    component: &JpegDctComponent,
3816    idct_blocks: &mut [Option<[i32; 64]>],
3817    x: usize,
3818    high_idx: usize,
3819) -> Result<i32, JpegToHtj2kError> {
3820    let height = component.height as usize;
3821    reversible_lift_53_high_at_fallible(height, high_idx, |y| {
3822        component_sample_i32(component, idct_blocks, x, y)
3823    })
3824}
3825
3826fn component_sample_i32(
3827    component: &JpegDctComponent,
3828    idct_blocks: &mut [Option<[i32; 64]>],
3829    x: usize,
3830    y: usize,
3831) -> Result<i32, JpegToHtj2kError> {
3832    if x >= component.width as usize || y >= component.height as usize {
3833        return Err(JpegToHtj2kError::Validation(
3834            "component sample coordinate exceeds dimensions",
3835        ));
3836    }
3837    let block_cols = component.block_cols as usize;
3838    let block_x = x / 8;
3839    let block_y = y / 8;
3840    let block_idx = block_y * block_cols + block_x;
3841    let block = component
3842        .dequantized_blocks
3843        .get(block_idx)
3844        .ok_or(JpegToHtj2kError::Validation(
3845            "component block grid does not cover requested sample",
3846        ))?;
3847    let cached = idct_blocks
3848        .get_mut(block_idx)
3849        .ok_or(JpegToHtj2kError::Validation(
3850            "integer IDCT cache does not cover requested block",
3851        ))?;
3852    let block_samples = cached.get_or_insert_with(|| {
3853        let decoded = idct_islow_block(block);
3854        decoded.map(|sample| i32::from(sample) - 128)
3855    });
3856    let local_idx = (y % 8) * 8 + (x % 8);
3857    Ok(block_samples[local_idx])
3858}
3859
3860fn integer_reference_coefficients(
3861    component: &JpegDctComponent,
3862    decomposition_levels: u8,
3863) -> Result<Vec<i32>, JpegToHtj2kError> {
3864    let samples = idct_component_samples_i32(component)?;
3865    let wavelet = reversible_dwt53_i32(
3866        samples,
3867        component.width as usize,
3868        component.height as usize,
3869        usize::from(decomposition_levels),
3870    );
3871    Ok(flatten_integer_wavelet(&wavelet))
3872}
3873
3874fn idct_component_samples_i32(component: &JpegDctComponent) -> Result<Vec<i32>, JpegToHtj2kError> {
3875    validate_component_block_grid(component)?;
3876
3877    let width = component.width as usize;
3878    let height = component.height as usize;
3879    let block_cols = component.block_cols as usize;
3880    let block_rows = component.block_rows as usize;
3881    let mut samples = vec![0; width * height];
3882    for block_y in 0..block_rows {
3883        for block_x in 0..block_cols {
3884            let block = &component.dequantized_blocks[block_y * block_cols + block_x];
3885            let block_samples = idct_islow_block(block);
3886            for local_y in 0..8 {
3887                let y = block_y * 8 + local_y;
3888                if y >= height {
3889                    continue;
3890                }
3891                for local_x in 0..8 {
3892                    let x = block_x * 8 + local_x;
3893                    if x >= width {
3894                        continue;
3895                    }
3896                    samples[y * width + x] = i32::from(block_samples[local_y * 8 + local_x]) - 128;
3897                }
3898            }
3899        }
3900    }
3901
3902    Ok(samples)
3903}
3904
3905fn validate_component_block_grid(component: &JpegDctComponent) -> Result<(), JpegToHtj2kError> {
3906    let block_cols = component.block_cols as usize;
3907    let block_rows = component.block_rows as usize;
3908    let expected_blocks =
3909        block_cols
3910            .checked_mul(block_rows)
3911            .ok_or(JpegToHtj2kError::Validation(
3912                "component block grid overflow",
3913            ))?;
3914    if component.dequantized_blocks.len() != expected_blocks {
3915        return Err(JpegToHtj2kError::Validation(
3916            "component block count does not match block grid",
3917        ));
3918    }
3919
3920    Ok(())
3921}
3922
3923fn reversible_dwt53_i32(
3924    mut buffer: Vec<i32>,
3925    width: usize,
3926    height: usize,
3927    decomposition_levels: usize,
3928) -> IntegerWavelet {
3929    let mut current_width = width;
3930    let mut current_height = height;
3931    let mut levels = Vec::with_capacity(decomposition_levels);
3932
3933    for _ in 0..decomposition_levels {
3934        for x in 0..current_width {
3935            let mut column = Vec::with_capacity(current_height);
3936            for y in 0..current_height {
3937                column.push(buffer[y * width + x]);
3938            }
3939            reversible_lift_53_i32(&mut column);
3940            let low_len = current_height.div_ceil(2);
3941            for (idx, value) in column.iter().step_by(2).copied().enumerate() {
3942                buffer[idx * width + x] = value;
3943            }
3944            for (idx, value) in column.iter().skip(1).step_by(2).copied().enumerate() {
3945                buffer[(low_len + idx) * width + x] = value;
3946            }
3947        }
3948
3949        for y in 0..current_height {
3950            let row_start = y * width;
3951            let mut row = buffer[row_start..row_start + current_width].to_vec();
3952            reversible_lift_53_i32(&mut row);
3953            let low_len = current_width.div_ceil(2);
3954            for (idx, value) in row.iter().step_by(2).copied().enumerate() {
3955                buffer[row_start + idx] = value;
3956            }
3957            for (idx, value) in row.iter().skip(1).step_by(2).copied().enumerate() {
3958                buffer[row_start + low_len + idx] = value;
3959            }
3960        }
3961
3962        let low_width = current_width.div_ceil(2);
3963        let low_height = current_height.div_ceil(2);
3964        let high_width = current_width / 2;
3965        let high_height = current_height / 2;
3966        let mut hl = Vec::with_capacity(high_width * low_height);
3967        let mut lh = Vec::with_capacity(low_width * high_height);
3968        let mut hh = Vec::with_capacity(high_width * high_height);
3969
3970        for y in 0..low_height {
3971            for x in 0..high_width {
3972                hl.push(buffer[y * width + low_width + x]);
3973            }
3974        }
3975        for y in 0..high_height {
3976            for x in 0..low_width {
3977                lh.push(buffer[(low_height + y) * width + x]);
3978            }
3979        }
3980        for y in 0..high_height {
3981            for x in 0..high_width {
3982                hh.push(buffer[(low_height + y) * width + low_width + x]);
3983            }
3984        }
3985
3986        levels.push(IntegerWaveletLevel {
3987            width: current_width,
3988            height: current_height,
3989            low_width,
3990            low_height,
3991            high_width,
3992            high_height,
3993            hl,
3994            lh,
3995            hh,
3996        });
3997        current_width = low_width;
3998        current_height = low_height;
3999    }
4000
4001    let mut final_ll = Vec::with_capacity(current_width * current_height);
4002    for y in 0..current_height {
4003        for x in 0..current_width {
4004            final_ll.push(buffer[y * width + x]);
4005        }
4006    }
4007
4008    IntegerWavelet {
4009        final_ll,
4010        final_ll_width: current_width,
4011        final_ll_height: current_height,
4012        levels,
4013    }
4014}
4015
4016fn flatten_integer_wavelet(wavelet: &IntegerWavelet) -> Vec<i32> {
4017    let coefficient_count = wavelet.final_ll.len()
4018        + wavelet
4019            .levels
4020            .iter()
4021            .map(|level| level.hl.len() + level.lh.len() + level.hh.len())
4022            .sum::<usize>();
4023    let mut output = Vec::with_capacity(coefficient_count);
4024    output.extend_from_slice(&wavelet.final_ll);
4025    for level in wavelet.levels.iter().rev() {
4026        output.extend_from_slice(&level.hl);
4027        output.extend_from_slice(&level.lh);
4028        output.extend_from_slice(&level.hh);
4029    }
4030    output
4031}
4032
4033fn append_rounded_i32(values: &[f64], output: &mut Vec<i32>) -> Result<(), JpegToHtj2kError> {
4034    for &value in values {
4035        output.push(round_f64_to_i32(value)?);
4036    }
4037    Ok(())
4038}
4039
4040fn round_f64_to_i32(value: f64) -> Result<i32, JpegToHtj2kError> {
4041    let rounded = value.round();
4042    if !rounded.is_finite() {
4043        return Err(JpegToHtj2kError::Validation(
4044            "float reference coefficient is not finite",
4045        ));
4046    }
4047    if rounded < f64::from(i32::MIN) || rounded > f64::from(i32::MAX) {
4048        return Err(JpegToHtj2kError::Validation(
4049            "float reference coefficient exceeds i32 range",
4050        ));
4051    }
4052    Ok(rounded as i32)
4053}
4054
4055fn decomposition_levels_for_components(
4056    components: &[JpegDctComponent],
4057    requested_levels: u8,
4058) -> Result<u8, JpegToHtj2kError> {
4059    if requested_levels == 0 {
4060        return Err(JpegToHtj2kError::Unsupported(
4061            "jpeg_to_htj2k requires at least one decomposition level",
4062        ));
4063    }
4064
4065    let available_levels = components
4066        .iter()
4067        .map(|component| available_decomposition_levels(component.width, component.height))
4068        .min()
4069        .ok_or(JpegToHtj2kError::Unsupported("missing JPEG components"))?;
4070    let decomposition_levels = requested_levels.min(available_levels);
4071    if decomposition_levels == 0 {
4072        return Err(JpegToHtj2kError::Unsupported(
4073            "component dimensions are too small for a DWT decomposition",
4074        ));
4075    }
4076
4077    Ok(decomposition_levels)
4078}
4079
4080fn available_decomposition_levels(width: u32, height: u32) -> u8 {
4081    let min_dim = width.min(height);
4082    if min_dim <= 1 {
4083        0
4084    } else {
4085        min_dim.ilog2() as u8
4086    }
4087}
4088
4089fn component_sampling_for_jpeg(
4090    components: &[JpegDctComponent],
4091    reference_width: u32,
4092    reference_height: u32,
4093) -> Result<Vec<(u8, u8)>, JpegToHtj2kError> {
4094    let max_h = components
4095        .iter()
4096        .map(|component| component.h_samp)
4097        .max()
4098        .ok_or(JpegToHtj2kError::Unsupported("missing JPEG components"))?;
4099    let max_v = components
4100        .iter()
4101        .map(|component| component.v_samp)
4102        .max()
4103        .ok_or(JpegToHtj2kError::Unsupported("missing JPEG components"))?;
4104
4105    components
4106        .iter()
4107        .map(|component| {
4108            if component.h_samp == 0 || component.v_samp == 0 {
4109                return Err(JpegToHtj2kError::Unsupported(
4110                    "JPEG component sampling factors must be non-zero",
4111                ));
4112            }
4113            if max_h % component.h_samp != 0 || max_v % component.v_samp != 0 {
4114                return Err(JpegToHtj2kError::Unsupported(
4115                    "fractional JPEG component sampling is not supported",
4116                ));
4117            }
4118
4119            let x_rsiz = max_h / component.h_samp;
4120            let y_rsiz = max_v / component.v_samp;
4121            let expected_width = reference_width.div_ceil(u32::from(x_rsiz));
4122            let expected_height = reference_height.div_ceil(u32::from(y_rsiz));
4123            if component.width != expected_width || component.height != expected_height {
4124                return Err(JpegToHtj2kError::Unsupported(
4125                    "JPEG component dimensions do not match derived SIZ sampling",
4126                ));
4127            }
4128
4129            Ok((x_rsiz, y_rsiz))
4130        })
4131        .collect()
4132}
4133
4134fn dct_blocks_to_8x8_f64_into(blocks: &[[i16; 64]], output: &mut Vec<[[f64; 8]; 8]>) {
4135    output.clear();
4136    output.reserve(blocks.len());
4137    for block in blocks {
4138        let mut converted = [[0.0; 8]; 8];
4139        for (idx, &coefficient) in block.iter().enumerate() {
4140            converted[idx / 8][idx % 8] = f64::from(coefficient);
4141        }
4142        output.push(converted);
4143    }
4144}
4145
4146fn dct_blocks_to_8x8_f64(blocks: &[[i16; 64]]) -> Vec<[[f64; 8]; 8]> {
4147    let mut output = Vec::with_capacity(blocks.len());
4148    dct_blocks_to_8x8_f64_into(blocks, &mut output);
4149    output
4150}
4151
4152#[cfg(test)]
4153mod tests {
4154    use super::*;
4155    use crate::accelerator::{
4156        DctGridI16ToHtj2k97CodeBlockBatch, PreencodedHtj2k97CodeBlock,
4157        PreencodedHtj2k97CompactCodeBlock, PreencodedHtj2k97CompactComponent,
4158        PreencodedHtj2k97CompactResolution, PreencodedHtj2k97CompactSubband,
4159        PreencodedHtj2k97Resolution, PreencodedHtj2k97Subband,
4160    };
4161    use j2k::adapter::encode_stage::{EncodedHtJ2kCodeBlock, J2kHtCodeBlockEncodeJob};
4162    use j2k_jpeg::transcode::JpegDctCodingMode;
4163    use j2k_jpeg::ColorSpace;
4164
4165    #[test]
4166    fn timing_report_add_assign_saturates_and_adds_all_counter_kinds() {
4167        let mut report = TranscodeTimingReport {
4168            source_raw_probe_us: u128::MAX - 1,
4169            dwt97_batch_ht_codeblock_dispatches: usize::MAX - 1,
4170            tile_count: 2,
4171            accelerator_jobs: 3,
4172            cpu_fallback_jobs: 4,
4173            ..TranscodeTimingReport::default()
4174        };
4175        report.add_assign(TranscodeTimingReport {
4176            source_raw_probe_us: 10,
4177            dwt97_batch_ht_codeblock_dispatches: 10,
4178            tile_count: 5,
4179            accelerator_jobs: 7,
4180            cpu_fallback_jobs: 11,
4181            ..TranscodeTimingReport::default()
4182        });
4183
4184        assert_eq!(report.source_raw_probe_us, u128::MAX);
4185        assert_eq!(report.dwt97_batch_ht_codeblock_dispatches, usize::MAX);
4186        assert_eq!(report.tile_count, 7);
4187        assert_eq!(report.accelerator_jobs, 10);
4188        assert_eq!(report.cpu_fallback_jobs, 15);
4189    }
4190
4191    #[derive(Default)]
4192    struct GroupedI16Accelerator {
4193        grouped_calls: usize,
4194        single_calls: usize,
4195        grouped_lengths: Vec<Vec<usize>>,
4196    }
4197
4198    impl DctToWaveletStageAccelerator for GroupedI16Accelerator {
4199        fn supports_htj2k97_i16_preencoded_batch(&self) -> bool {
4200            true
4201        }
4202
4203        fn dct_grid_i16_to_htj2k97_preencoded_batch(
4204            &mut self,
4205            jobs: &[DctGridI16ToHtj2k97CodeBlockJob<'_>],
4206            _options: Htj2k97CodeBlockOptions,
4207        ) -> Result<Option<Vec<PreencodedHtj2k97Component>>, TranscodeStageError> {
4208            self.single_calls = self.single_calls.saturating_add(1);
4209            Ok(Some(
4210                jobs.iter()
4211                    .map(|job| dummy_preencoded_component(job.x_rsiz, job.y_rsiz))
4212                    .collect(),
4213            ))
4214        }
4215
4216        fn dct_grid_i16_to_htj2k97_preencoded_batch_groups(
4217            &mut self,
4218            groups: &[DctGridI16ToHtj2k97CodeBlockBatch<'_, '_>],
4219            _options: Htj2k97CodeBlockOptions,
4220        ) -> Result<Option<Vec<Vec<PreencodedHtj2k97Component>>>, TranscodeStageError> {
4221            self.grouped_calls = self.grouped_calls.saturating_add(1);
4222            self.grouped_lengths
4223                .push(groups.iter().map(|group| group.jobs.len()).collect());
4224            Ok(Some(
4225                groups
4226                    .iter()
4227                    .map(|group| {
4228                        group
4229                            .jobs
4230                            .iter()
4231                            .map(|job| dummy_preencoded_component(job.x_rsiz, job.y_rsiz))
4232                            .collect()
4233                    })
4234                    .collect(),
4235            ))
4236        }
4237    }
4238
4239    #[test]
4240    fn float97_batch_offers_i16_preencoded_geometry_groups_together() {
4241        let mut tiles = vec![test_float97_tile()];
4242        let options = JpegToHtj2kOptions::lossy_97();
4243        let mut scratch = JpegToHtj2kScratch::default();
4244        let mut accelerator = GroupedI16Accelerator::default();
4245        let mut timings = TranscodeTimingReport::default();
4246
4247        let (batch_count, job_count) = transform_float97_batch_tiles(
4248            &mut tiles,
4249            &options,
4250            &mut scratch,
4251            &mut accelerator,
4252            &mut timings,
4253        )
4254        .expect("grouped i16 preencoded transform");
4255
4256        assert_eq!(batch_count, 2);
4257        assert_eq!(job_count, 3);
4258        assert_eq!(accelerator.grouped_calls, 1);
4259        assert_eq!(accelerator.single_calls, 0);
4260        assert_eq!(accelerator.grouped_lengths, vec![vec![1, 2]]);
4261        assert!(tiles[0].preencoded_components.iter().all(Option::is_some));
4262    }
4263
4264    #[derive(Default)]
4265    struct CountingHtBatchEncodeAccelerator {
4266        batches: usize,
4267        jobs: usize,
4268        single_blocks: usize,
4269    }
4270
4271    impl J2kEncodeStageAccelerator for CountingHtBatchEncodeAccelerator {
4272        fn encode_ht_code_blocks(
4273            &mut self,
4274            jobs: &[J2kHtCodeBlockEncodeJob<'_>],
4275        ) -> Result<Option<Vec<EncodedHtJ2kCodeBlock>>, &'static str> {
4276            self.batches = self.batches.saturating_add(1);
4277            self.jobs = self.jobs.saturating_add(jobs.len());
4278            Ok(None)
4279        }
4280
4281        fn encode_ht_code_block(
4282            &mut self,
4283            _job: J2kHtCodeBlockEncodeJob<'_>,
4284        ) -> Result<Option<EncodedHtJ2kCodeBlock>, &'static str> {
4285            self.single_blocks = self.single_blocks.saturating_add(1);
4286            Ok(None)
4287        }
4288    }
4289
4290    #[test]
4291    fn float97_precomputed_prepared_tiles_offer_all_tiles_to_one_ht_batch() {
4292        let tiles = vec![
4293            test_float97_precomputed_tile(0),
4294            test_float97_precomputed_tile(1),
4295        ];
4296        let mut options = JpegToHtj2kOptions::lossy_97();
4297        options.encode_options.code_block_width_exp = 2;
4298        options.encode_options.code_block_height_exp = 2;
4299        let mut accelerator = CountingHtBatchEncodeAccelerator::default();
4300
4301        let encoded_tiles = encode_float97_prepared_tiles(tiles, &options, &mut accelerator);
4302
4303        assert_eq!(encoded_tiles.len(), 2);
4304        for (expected_tile_index, (actual_tile_index, encoded)) in
4305            encoded_tiles.into_iter().enumerate()
4306        {
4307            assert_eq!(actual_tile_index, expected_tile_index);
4308            let encoded = encoded.expect("precomputed batch tile encodes");
4309            assert!(encoded.codestream.starts_with(&[0xff, 0x4f]));
4310        }
4311        assert_eq!(accelerator.batches, 1);
4312        assert!(accelerator.jobs > 0);
4313        assert_eq!(accelerator.single_blocks, accelerator.jobs);
4314    }
4315
4316    #[test]
4317    fn compact_preencoded_component_storage_rebases_ranges_into_tile_payload() {
4318        let mut tile = test_float97_tile();
4319        let batch_payload = vec![1, 2, 3, 4, 5, 6];
4320        let component = PreencodedHtj2k97CompactComponent {
4321            x_rsiz: 1,
4322            y_rsiz: 1,
4323            resolutions: vec![PreencodedHtj2k97CompactResolution {
4324                subbands: vec![PreencodedHtj2k97CompactSubband {
4325                    sub_band_type: crate::accelerator::J2kSubBandType::LowLow,
4326                    num_cbs_x: 2,
4327                    num_cbs_y: 1,
4328                    total_bitplanes: 1,
4329                    code_blocks: vec![
4330                        PreencodedHtj2k97CompactCodeBlock {
4331                            width: 1,
4332                            height: 1,
4333                            payload_range: 1..3,
4334                            cleanup_length: 2,
4335                            refinement_length: 0,
4336                            num_coding_passes: 1,
4337                            num_zero_bitplanes: 0,
4338                        },
4339                        PreencodedHtj2k97CompactCodeBlock {
4340                            width: 1,
4341                            height: 1,
4342                            payload_range: 3..6,
4343                            cleanup_length: 3,
4344                            refinement_length: 0,
4345                            num_coding_passes: 1,
4346                            num_zero_bitplanes: 0,
4347                        },
4348                    ],
4349                }],
4350            }],
4351        };
4352
4353        store_compact_preencoded_component(&mut tile, 1, &batch_payload, component)
4354            .expect("compact component storage");
4355
4356        let stored = tile.preencoded_compact_components[1]
4357            .as_ref()
4358            .expect("stored compact component");
4359        assert_eq!(tile.preencoded_compact_payload, vec![2, 3, 4, 5, 6]);
4360        assert_eq!(
4361            stored.resolutions[0].subbands[0].code_blocks[0].payload_range,
4362            0..2
4363        );
4364        assert_eq!(
4365            stored.resolutions[0].subbands[0].code_blocks[1].payload_range,
4366            2..5
4367        );
4368    }
4369
4370    fn test_float97_tile() -> Float97BatchTile {
4371        let components = vec![
4372            test_component(0, 16, 16, 2, 2),
4373            test_component(1, 8, 8, 1, 1),
4374            test_component(2, 8, 8, 1, 1),
4375        ];
4376        Float97BatchTile {
4377            tile_index: 0,
4378            jpeg: JpegDctImage {
4379                width: 16,
4380                height: 16,
4381                color_space: ColorSpace::YCbCr,
4382                coding_mode: JpegDctCodingMode::BaselineSequential,
4383                scan_count: 1,
4384                components,
4385                restart_index: None,
4386            },
4387            component_sampling: vec![(1, 1), (2, 2), (2, 2)],
4388            decomposition_levels: 1,
4389            all_unit_sampled: false,
4390            component_reports: Vec::new(),
4391            precomputed_components: vec![None, None, None],
4392            preencoded_compact_payload: Vec::new(),
4393            preencoded_compact_components: vec![None, None, None],
4394            preencoded_components: vec![None, None, None],
4395            prequantized_components: vec![None, None, None],
4396            float_validation_actual: Vec::new(),
4397            float_validation_expected: Vec::new(),
4398            timings: TranscodeTimingReport::default(),
4399        }
4400    }
4401
4402    fn test_float97_precomputed_tile(tile_index: usize) -> Float97BatchTile {
4403        let width = 17;
4404        let height = 13;
4405        let component = test_component(0, width, height, 1, 1);
4406        Float97BatchTile {
4407            tile_index,
4408            jpeg: JpegDctImage {
4409                width,
4410                height,
4411                color_space: ColorSpace::Grayscale,
4412                coding_mode: JpegDctCodingMode::BaselineSequential,
4413                scan_count: 1,
4414                components: vec![component],
4415                restart_index: None,
4416            },
4417            component_sampling: vec![(1, 1)],
4418            decomposition_levels: 1,
4419            all_unit_sampled: true,
4420            component_reports: vec![TranscodeComponentReport {
4421                component_index: 0,
4422                width,
4423                height,
4424                block_cols: width.div_ceil(8),
4425                block_rows: height.div_ceil(8),
4426                x_rsiz: 1,
4427                y_rsiz: 1,
4428            }],
4429            precomputed_components: vec![Some(dummy_precomputed_component(1, 1, width, height))],
4430            preencoded_compact_payload: Vec::new(),
4431            preencoded_compact_components: vec![None],
4432            preencoded_components: vec![None],
4433            prequantized_components: vec![None],
4434            float_validation_actual: Vec::new(),
4435            float_validation_expected: Vec::new(),
4436            timings: TranscodeTimingReport::default(),
4437        }
4438    }
4439
4440    fn test_component(
4441        component_index: usize,
4442        width: u32,
4443        height: u32,
4444        h_samp: u8,
4445        v_samp: u8,
4446    ) -> JpegDctComponent {
4447        let block_cols = width.div_ceil(8);
4448        let block_rows = height.div_ceil(8);
4449        let block_count = (block_cols * block_rows) as usize;
4450        JpegDctComponent {
4451            component_index,
4452            width,
4453            height,
4454            h_samp,
4455            v_samp,
4456            block_cols,
4457            block_rows,
4458            quant_table: [1u16; 64],
4459            quantized_blocks: vec![[0i16; 64]; block_count],
4460            dequantized_blocks: vec![[0i16; 64]; block_count],
4461        }
4462    }
4463
4464    fn dummy_precomputed_component(
4465        x_rsiz: u8,
4466        y_rsiz: u8,
4467        width: u32,
4468        height: u32,
4469    ) -> PrecomputedHtj2k97Component {
4470        let low_width = width.div_ceil(2);
4471        let low_height = height.div_ceil(2);
4472        let high_width = width / 2;
4473        let high_height = height / 2;
4474        PrecomputedHtj2k97Component {
4475            x_rsiz,
4476            y_rsiz,
4477            dwt: J2kForwardDwt97Output {
4478                ll: sample_f32_coefficients(low_width * low_height, 0.25),
4479                ll_width: low_width,
4480                ll_height: low_height,
4481                levels: vec![J2kForwardDwt97Level {
4482                    hl: sample_f32_coefficients(high_width * low_height, -0.75),
4483                    lh: sample_f32_coefficients(low_width * high_height, 1.25),
4484                    hh: sample_f32_coefficients(high_width * high_height, -1.5),
4485                    width,
4486                    height,
4487                    low_width,
4488                    low_height,
4489                    high_width,
4490                    high_height,
4491                }],
4492            },
4493        }
4494    }
4495
4496    fn sample_f32_coefficients(count: u32, seed: f32) -> Vec<f32> {
4497        (0..count)
4498            .map(|idx| seed + (idx as f32).sin() * 0.125)
4499            .collect()
4500    }
4501
4502    fn dummy_preencoded_component(x_rsiz: u8, y_rsiz: u8) -> PreencodedHtj2k97Component {
4503        PreencodedHtj2k97Component {
4504            x_rsiz,
4505            y_rsiz,
4506            resolutions: vec![PreencodedHtj2k97Resolution {
4507                subbands: vec![PreencodedHtj2k97Subband {
4508                    sub_band_type: crate::accelerator::J2kSubBandType::LowLow,
4509                    num_cbs_x: 1,
4510                    num_cbs_y: 1,
4511                    total_bitplanes: 1,
4512                    code_blocks: vec![PreencodedHtj2k97CodeBlock {
4513                        width: 1,
4514                        height: 1,
4515                        encoded: EncodedHtJ2kCodeBlock {
4516                            data: Vec::new(),
4517                            cleanup_length: 0,
4518                            refinement_length: 0,
4519                            num_coding_passes: 0,
4520                            num_zero_bitplanes: 1,
4521                        },
4522                    }],
4523                }],
4524            }],
4525        }
4526    }
4527}