1use core::fmt;
6use std::time::Instant;
7
8use j2k::adapter::encode_stage::{
9 CpuOnlyJ2kEncodeStageAccelerator, IrreversibleQuantizationSubbandScales,
10 J2kEncodeDispatchReport, J2kEncodeStageAccelerator, J2kForwardDwt53Level,
11 J2kForwardDwt53Output, J2kForwardDwt97Level, J2kForwardDwt97Output, NativeEncodeStageAdapter,
12 PrecomputedHtj2k53Component, PrecomputedHtj2k53Image, PrecomputedHtj2k97Component,
13 PrecomputedHtj2k97Image, PreencodedHtj2k97CompactComponent, PreencodedHtj2k97CompactImage,
14 PreencodedHtj2k97Component, PreencodedHtj2k97Image, PrequantizedHtj2k97Component,
15 PrequantizedHtj2k97Image,
16};
17use j2k::J2kProgressionOrder;
18use j2k_jpeg::transcode::{
19 extract_dct_blocks, idct_islow_block, DctExtractOptions, JpegDctComponent, JpegDctImage,
20};
21use j2k_native::{
22 encode_precomputed_htj2k_53_with_accelerator,
23 encode_precomputed_htj2k_97_batch_with_accelerator,
24 encode_precomputed_htj2k_97_with_accelerator,
25 encode_preencoded_htj2k_97_compact_owned_with_accelerator,
26 encode_preencoded_htj2k_97_owned_with_accelerator,
27 encode_prequantized_htj2k_97_with_accelerator,
28};
29use rayon::prelude::*;
30
31use crate::accelerator::{
32 CpuOnlyDctToWaveletStageAccelerator, DctGridI16ToHtj2k97CodeBlockBatch,
33 DctGridI16ToHtj2k97CodeBlockJob, DctGridToDwt53Job, DctGridToDwt97Job,
34 DctGridToHtj2k97CodeBlockJob, DctGridToReversibleDwt53Job, DctToWaveletStageAccelerator,
35 Dwt97BatchStageTimings, Htj2k97CodeBlockOptions, ReversibleDwt53FirstLevel,
36 TranscodeStageError,
37};
38use crate::dct53_2d::{
39 dct8x8_blocks_then_dwt53_float, dct8x8_blocks_to_dwt53_float_linear_with_scratch,
40 linearized_53_2d_from_plane, Dct53GridScratch, Dwt53TwoDimensional,
41};
42use crate::dct97_2d::{
43 dct8x8_blocks_then_dwt97_float, dct8x8_blocks_then_dwt97_float_with_scratch,
44 linearized_97_2d_from_plane_with_scratch, Dct97GridScratch, Dwt97TwoDimensional,
45};
46use crate::metrics::{error_metrics_i32, ErrorMetrics, MetricsLengthError};
47use crate::reversible53::{
48 reversible_lift_53_high_at_fallible, reversible_lift_53_i32, reversible_lift_53_low_at_fallible,
49};
50use crate::DctGridError;
51
52pub const JPEG_TO_HTJ2K_LOSSY_97_QUANTIZATION_SCALE: f32 = 1.9;
59
60#[derive(Debug, Clone, PartialEq)]
62#[allow(clippy::struct_excessive_bools)]
63pub struct JpegToHtj2kEncodeOptions {
64 pub num_decomposition_levels: u8,
66 pub reversible: bool,
68 pub code_block_width_exp: u8,
70 pub code_block_height_exp: u8,
72 pub guard_bits: u8,
74 pub use_ht_block_coding: bool,
76 pub progression_order: J2kProgressionOrder,
78 pub write_tlm: bool,
80 pub write_plt: bool,
82 pub write_plm: bool,
84 pub write_sop: bool,
86 pub write_eph: bool,
88 pub use_mct: bool,
90 pub num_layers: u8,
92 pub quality_layer_byte_targets: Vec<u64>,
94 pub validate_high_throughput_codestream: bool,
96 pub irreversible_quantization_scale: f32,
98 pub irreversible_quantization_subband_scales: IrreversibleQuantizationSubbandScales,
100 pub component_sampling: Option<Vec<(u8, u8)>>,
102 pub tile_size: Option<(u32, u32)>,
104 pub precinct_exponents: Vec<(u8, u8)>,
106}
107
108impl Default for JpegToHtj2kEncodeOptions {
109 fn default() -> Self {
110 Self {
111 num_decomposition_levels: 5,
112 reversible: true,
113 code_block_width_exp: 4,
114 code_block_height_exp: 4,
115 guard_bits: 1,
116 use_ht_block_coding: false,
117 progression_order: J2kProgressionOrder::Lrcp,
118 write_tlm: false,
119 write_plt: false,
120 write_plm: false,
121 write_sop: false,
122 write_eph: false,
123 use_mct: true,
124 num_layers: 1,
125 quality_layer_byte_targets: Vec::new(),
126 validate_high_throughput_codestream: true,
127 irreversible_quantization_scale: 1.0,
128 irreversible_quantization_subband_scales:
129 IrreversibleQuantizationSubbandScales::default(),
130 component_sampling: None,
131 tile_size: None,
132 precinct_exponents: Vec::new(),
133 }
134 }
135}
136
137impl JpegToHtj2kEncodeOptions {
138 fn to_native(&self) -> j2k_native::EncodeOptions {
139 j2k_native::EncodeOptions {
140 num_decomposition_levels: self.num_decomposition_levels,
141 reversible: self.reversible,
142 code_block_width_exp: self.code_block_width_exp,
143 code_block_height_exp: self.code_block_height_exp,
144 guard_bits: self.guard_bits,
145 use_ht_block_coding: self.use_ht_block_coding,
146 progression_order: native_progression_order(self.progression_order),
147 write_tlm: self.write_tlm,
148 write_plt: self.write_plt,
149 write_plm: self.write_plm,
150 write_sop: self.write_sop,
151 write_eph: self.write_eph,
152 use_mct: self.use_mct,
153 num_layers: self.num_layers,
154 quality_layer_byte_targets: self.quality_layer_byte_targets.clone(),
155 validate_high_throughput_codestream: self.validate_high_throughput_codestream,
156 irreversible_quantization_scale: self.irreversible_quantization_scale,
157 irreversible_quantization_subband_scales: self.irreversible_quantization_subband_scales,
158 component_sampling: self.component_sampling.clone(),
159 tile_size: self.tile_size,
160 precinct_exponents: self.precinct_exponents.clone(),
161 }
162 }
163}
164
165#[derive(Debug, Clone)]
167pub struct JpegToHtj2kOptions {
168 pub encode_options: JpegToHtj2kEncodeOptions,
170 pub coefficient_path: JpegToHtj2kCoefficientPath,
172 pub validate_against_float_reference: bool,
176 pub validate_against_integer_reference: bool,
181}
182
183impl Default for JpegToHtj2kOptions {
184 fn default() -> Self {
185 Self::lossless_53()
186 }
187}
188
189impl JpegToHtj2kOptions {
190 #[must_use]
192 pub fn lossless_53() -> Self {
193 Self {
194 encode_options: transcode_encode_options(true),
195 coefficient_path: JpegToHtj2kCoefficientPath::IntegerDirect53,
196 validate_against_float_reference: false,
197 validate_against_integer_reference: false,
198 }
199 }
200
201 #[must_use]
203 pub fn lossy_97() -> Self {
204 let mut encode_options = transcode_encode_options(false);
205 encode_options.irreversible_quantization_scale = JPEG_TO_HTJ2K_LOSSY_97_QUANTIZATION_SCALE;
206 Self {
207 encode_options,
208 coefficient_path: JpegToHtj2kCoefficientPath::FloatDirectLinear97,
209 validate_against_float_reference: false,
210 validate_against_integer_reference: false,
211 }
212 }
213}
214
215fn transcode_encode_options(reversible: bool) -> JpegToHtj2kEncodeOptions {
216 JpegToHtj2kEncodeOptions {
217 num_decomposition_levels: 1,
218 reversible,
219 use_ht_block_coding: true,
220 use_mct: false,
221 validate_high_throughput_codestream: false,
222 ..JpegToHtj2kEncodeOptions::default()
223 }
224}
225
226#[derive(Debug, Clone, Copy, PartialEq, Eq)]
228pub enum JpegToHtj2kCoefficientPath {
229 IntegerDirect53,
234 FloatDirectLinear53,
239 FloatDirectLinear97,
243}
244
245#[derive(Debug, Default)]
251pub struct JpegToHtj2kTranscoder {
252 scratch: JpegToHtj2kScratch,
253}
254
255impl JpegToHtj2kTranscoder {
256 pub fn transcode(
259 &mut self,
260 bytes: &[u8],
261 options: &JpegToHtj2kOptions,
262 ) -> Result<EncodedTranscode, JpegToHtj2kError> {
263 let mut accelerator = CpuOnlyDctToWaveletStageAccelerator;
264 self.transcode_with_accelerator(bytes, options, &mut accelerator)
265 }
266
267 pub fn transcode_with_accelerator<A: DctToWaveletStageAccelerator>(
273 &mut self,
274 bytes: &[u8],
275 options: &JpegToHtj2kOptions,
276 accelerator: &mut A,
277 ) -> Result<EncodedTranscode, JpegToHtj2kError> {
278 let mut encode_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
279 self.transcode_with_accelerators(bytes, options, accelerator, &mut encode_accelerator)
280 }
281
282 pub fn transcode_with_accelerators<
285 A: DctToWaveletStageAccelerator,
286 E: J2kEncodeStageAccelerator,
287 >(
288 &mut self,
289 bytes: &[u8],
290 options: &JpegToHtj2kOptions,
291 transform_accelerator: &mut A,
292 encode_accelerator: &mut E,
293 ) -> Result<EncodedTranscode, JpegToHtj2kError> {
294 jpeg_to_htj2k_with_scratch(
295 bytes,
296 options,
297 &mut self.scratch,
298 transform_accelerator,
299 encode_accelerator,
300 )
301 }
302
303 pub fn transcode_batch(
307 &mut self,
308 tiles: &[JpegTileBatchInput<'_>],
309 options: &JpegToHtj2kOptions,
310 ) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
311 let mut accelerator = CpuOnlyDctToWaveletStageAccelerator;
312 self.transcode_batch_with_accelerator(tiles, options, &mut accelerator)
313 }
314
315 pub fn transcode_batch_with_accelerator<A: DctToWaveletStageAccelerator>(
317 &mut self,
318 tiles: &[JpegTileBatchInput<'_>],
319 options: &JpegToHtj2kOptions,
320 accelerator: &mut A,
321 ) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
322 let mut encode_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
323 self.transcode_batch_with_accelerators(tiles, options, accelerator, &mut encode_accelerator)
324 }
325
326 pub fn transcode_batch_with_accelerators<
329 A: DctToWaveletStageAccelerator,
330 E: J2kEncodeStageAccelerator,
331 >(
332 &mut self,
333 tiles: &[JpegTileBatchInput<'_>],
334 options: &JpegToHtj2kOptions,
335 transform_accelerator: &mut A,
336 encode_accelerator: &mut E,
337 ) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
338 jpeg_tile_batch_to_htj2k_with_scratch(
339 tiles,
340 options,
341 &mut self.scratch,
342 transform_accelerator,
343 encode_accelerator,
344 )
345 }
346
347 #[must_use]
352 pub fn dct_block_scratch_capacity(&self) -> usize {
353 self.scratch.dct_blocks_f64.capacity()
354 }
355
356 #[must_use]
361 pub fn integer_idct_block_scratch_capacity(&self) -> usize {
362 self.scratch.integer_idct_blocks.capacity()
363 }
364}
365
366#[derive(Debug, Default)]
367struct JpegToHtj2kScratch {
368 dct_blocks_f64: Vec<[[f64; 8]; 8]>,
369 dct53_grid: Dct53GridScratch,
370 dct97_grid: Dct97GridScratch,
371 integer_idct_blocks: Vec<Option<[i32; 64]>>,
372 integer_row: Vec<i32>,
373}
374
375#[derive(Debug, Clone)]
377pub struct EncodedTranscode {
378 pub codestream: Vec<u8>,
380 pub report: TranscodeReport,
382}
383
384#[derive(Debug, Clone, Copy)]
386pub struct JpegTileBatchInput<'a> {
387 pub bytes: &'a [u8],
389}
390
391#[derive(Debug)]
394pub struct EncodedTranscodeBatch {
395 pub tiles: Vec<Result<EncodedTranscode, JpegToHtj2kError>>,
397 pub report: BatchTranscodeReport,
399}
400
401#[derive(Debug, Clone, PartialEq, Eq)]
403pub struct BatchTranscodeReport {
404 pub tile_count: usize,
406 pub successful_tiles: usize,
408 pub failed_tiles: usize,
410 pub transformed_components: usize,
412 pub reversible_dwt53_batches: usize,
414 pub reversible_dwt53_batch_jobs: usize,
416 pub extract_us: u128,
418 pub transform_us: u128,
420 pub encode_us: u128,
422 pub timings: TranscodeTimingReport,
425 pub coefficient_path: JpegToHtj2kCoefficientPath,
427}
428
429#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
436pub struct TranscodeTimingReport {
437 pub source_raw_probe_us: u128,
439 pub read_region_decode_us: u128,
441 pub compose_pad_us: u128,
443 pub generated_jpeg_encode_us: u128,
445 pub jpeg_dct_extract_us: u128,
447 pub jpeg_dct_repack_us: u128,
449 pub dct_to_wavelet_total_us: u128,
451 pub dct_to_wavelet_accelerator_us: u128,
453 pub dct_to_wavelet_cpu_fallback_us: u128,
455 pub dwt_decompose_us: u128,
457 pub dwt97_batch_pack_upload_us: u128,
459 pub dwt97_batch_idct_row_lift_us: u128,
461 pub dwt97_batch_column_lift_us: u128,
463 pub dwt97_batch_quantize_codeblock_us: u128,
465 pub dwt97_batch_ht_encode_us: u128,
467 pub dwt97_batch_ht_kernel_us: u128,
469 pub dwt97_batch_ht_status_readback_us: u128,
471 pub dwt97_batch_ht_compact_us: u128,
473 pub dwt97_batch_ht_output_readback_us: u128,
475 pub dwt97_batch_ht_codeblock_dispatches: usize,
477 pub dwt97_batch_readback_us: u128,
479 pub htj2k_encode_us: u128,
481 pub htj2k_encode_accelerator_dispatches: usize,
483 pub htj2k_encode_ht_code_block_dispatches: usize,
485 pub htj2k_encode_packetization_dispatches: usize,
487 pub dicom_spool_write_us: u128,
489 pub dicom_final_write_us: u128,
491 pub tile_count: usize,
493 pub component_count: usize,
495 pub batch_count: usize,
497 pub batch_jobs: usize,
499 pub accelerator_attempts: usize,
501 pub accelerator_jobs: usize,
503 pub accelerator_dispatches: usize,
505 pub accelerator_dispatched_jobs: usize,
507 pub cpu_fallback_jobs: usize,
509}
510
511impl TranscodeTimingReport {
512 fn add_assign(&mut self, other: Self) {
513 macro_rules! saturating_add_fields {
514 ($($field:ident),+ $(,)?) => {
515 $(
516 self.$field = self.$field.saturating_add(other.$field);
517 )+
518 };
519 }
520
521 saturating_add_fields!(
522 source_raw_probe_us,
523 read_region_decode_us,
524 compose_pad_us,
525 generated_jpeg_encode_us,
526 jpeg_dct_extract_us,
527 jpeg_dct_repack_us,
528 dct_to_wavelet_total_us,
529 dct_to_wavelet_accelerator_us,
530 dct_to_wavelet_cpu_fallback_us,
531 dwt_decompose_us,
532 dwt97_batch_pack_upload_us,
533 dwt97_batch_idct_row_lift_us,
534 dwt97_batch_column_lift_us,
535 dwt97_batch_quantize_codeblock_us,
536 dwt97_batch_ht_encode_us,
537 dwt97_batch_ht_kernel_us,
538 dwt97_batch_ht_status_readback_us,
539 dwt97_batch_ht_compact_us,
540 dwt97_batch_ht_output_readback_us,
541 dwt97_batch_ht_codeblock_dispatches,
542 dwt97_batch_readback_us,
543 htj2k_encode_us,
544 htj2k_encode_accelerator_dispatches,
545 htj2k_encode_ht_code_block_dispatches,
546 htj2k_encode_packetization_dispatches,
547 dicom_spool_write_us,
548 dicom_final_write_us,
549 tile_count,
550 component_count,
551 batch_count,
552 batch_jobs,
553 accelerator_attempts,
554 accelerator_jobs,
555 accelerator_dispatches,
556 accelerator_dispatched_jobs,
557 cpu_fallback_jobs,
558 );
559 }
560}
561
562#[derive(Debug, Clone, PartialEq, Eq)]
564pub struct TranscodeComponentReport {
565 pub component_index: usize,
567 pub width: u32,
569 pub height: u32,
571 pub block_cols: u32,
573 pub block_rows: u32,
575 pub x_rsiz: u8,
577 pub y_rsiz: u8,
579}
580
581pub type TranscodeValidationMetrics = ErrorMetrics;
583
584#[derive(Debug, Clone, Copy, PartialEq, Eq)]
586pub enum TranscodeValidationClassification {
587 Exact,
589 OneLsbBounded,
593 OutsideThreshold,
595}
596
597impl TranscodeValidationClassification {
598 #[must_use]
601 pub fn classify_metrics(metrics: &TranscodeValidationMetrics) -> Self {
602 if metrics.exact_matches == metrics.total && metrics.max_abs_error == 0 {
603 Self::Exact
604 } else if metrics.is_one_lsb_bounded(0.999) {
605 Self::OneLsbBounded
606 } else {
607 Self::OutsideThreshold
608 }
609 }
610}
611
612#[derive(Debug, Clone, PartialEq, Eq)]
614pub struct TranscodeReport {
615 pub width: u32,
617 pub height: u32,
619 pub component_count: usize,
621 pub components: Vec<TranscodeComponentReport>,
623 pub float_reference_metrics: Option<TranscodeValidationMetrics>,
626 pub float_reference_classification: Option<TranscodeValidationClassification>,
628 pub integer_reference_metrics: Option<TranscodeValidationMetrics>,
631 pub integer_reference_classification: Option<TranscodeValidationClassification>,
633 pub decomposition_levels: u8,
635 pub coefficient_path: JpegToHtj2kCoefficientPath,
637 pub path: &'static str,
639 pub extract_us: u128,
641 pub transform_us: u128,
643 pub encode_us: u128,
645 pub timings: TranscodeTimingReport,
647}
648
649#[derive(Debug)]
651pub enum JpegToHtj2kError {
652 Jpeg(j2k_jpeg::JpegError),
654 Unsupported(&'static str),
656 Grid(String),
658 Grid97(String),
661 Accelerator(TranscodeStageError),
663 Metrics(String),
665 Validation(&'static str),
667 Encode(&'static str),
669}
670
671impl fmt::Display for JpegToHtj2kError {
672 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
673 match self {
674 Self::Jpeg(err) => write!(f, "JPEG extraction failed: {err}"),
675 Self::Unsupported(reason) => write!(f, "unsupported transcode input: {reason}"),
676 Self::Grid(reason) | Self::Grid97(reason) => {
677 write!(f, "DCT grid transform failed: {reason}")
678 }
679 Self::Accelerator(reason) => write!(f, "transform accelerator failed: {reason}"),
680 Self::Metrics(reason) => write!(f, "validation metrics failed: {reason}"),
681 Self::Validation(reason) => write!(f, "validation failed: {reason}"),
682 Self::Encode(reason) => write!(f, "HTJ2K encode failed: {reason}"),
683 }
684 }
685}
686
687impl std::error::Error for JpegToHtj2kError {
688 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
689 match self {
690 Self::Jpeg(err) => Some(err),
691 Self::Unsupported(_)
692 | Self::Grid(_)
693 | Self::Grid97(_)
694 | Self::Accelerator(_)
695 | Self::Metrics(_)
696 | Self::Validation(_)
697 | Self::Encode(_) => None,
698 }
699 }
700}
701
702impl From<j2k_jpeg::JpegError> for JpegToHtj2kError {
703 fn from(value: j2k_jpeg::JpegError) -> Self {
704 Self::Jpeg(value)
705 }
706}
707
708fn dct53_grid_error(value: DctGridError) -> JpegToHtj2kError {
709 JpegToHtj2kError::Grid(value.to_string())
710}
711
712fn dct97_grid_error(value: DctGridError) -> JpegToHtj2kError {
713 JpegToHtj2kError::Grid97(value.to_string())
714}
715
716impl From<MetricsLengthError> for JpegToHtj2kError {
717 fn from(value: MetricsLengthError) -> Self {
718 Self::Metrics(value.to_string())
719 }
720}
721
722pub fn jpeg_to_htj2k(
729 bytes: &[u8],
730 options: &JpegToHtj2kOptions,
731) -> Result<EncodedTranscode, JpegToHtj2kError> {
732 JpegToHtj2kTranscoder::default().transcode(bytes, options)
733}
734
735pub fn jpeg_to_htj2k_batch(
737 tiles: &[JpegTileBatchInput<'_>],
738 options: &JpegToHtj2kOptions,
739) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
740 JpegToHtj2kTranscoder::default().transcode_batch(tiles, options)
741}
742
743fn jpeg_tile_batch_to_htj2k_with_scratch<
744 A: DctToWaveletStageAccelerator,
745 E: J2kEncodeStageAccelerator,
746>(
747 tiles: &[JpegTileBatchInput<'_>],
748 options: &JpegToHtj2kOptions,
749 scratch: &mut JpegToHtj2kScratch,
750 accelerator: &mut A,
751 encode_accelerator: &mut E,
752) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
753 validate_transcode_options(options)?;
754 match options.coefficient_path {
755 JpegToHtj2kCoefficientPath::IntegerDirect53 => {}
756 JpegToHtj2kCoefficientPath::FloatDirectLinear97
757 if accelerator.supports_dwt97_batch()
758 || accelerator.supports_htj2k97_codeblock_batch() =>
759 {
760 return jpeg_float97_tile_batch_to_htj2k_with_scratch(
761 tiles,
762 options,
763 scratch,
764 accelerator,
765 encode_accelerator,
766 );
767 }
768 JpegToHtj2kCoefficientPath::FloatDirectLinear53
769 | JpegToHtj2kCoefficientPath::FloatDirectLinear97 => {
770 return Ok(transcode_tile_batch_individually(
771 tiles,
772 options,
773 scratch,
774 accelerator,
775 encode_accelerator,
776 ));
777 }
778 }
779
780 let extract_start = Instant::now();
781 let prepared_results = tiles
782 .par_iter()
783 .enumerate()
784 .map(|(tile_index, tile)| {
785 (
786 tile_index,
787 prepare_integer_batch_tile(tile_index, tile.bytes, options),
788 )
789 })
790 .collect::<Vec<_>>();
791 let extract_us = extract_start.elapsed().as_micros();
792 let mut tile_results: Vec<Option<Result<EncodedTranscode, JpegToHtj2kError>>> =
793 (0..tiles.len()).map(|_| None).collect();
794 let mut prepared_tiles = Vec::new();
795 for (tile_index, result) in prepared_results {
796 match result {
797 Ok(prepared) => prepared_tiles.push(prepared),
798 Err(error) => tile_results[tile_index] = Some(Err(error)),
799 }
800 }
801
802 let transform_start = Instant::now();
803 let mut timings = TranscodeTimingReport::default();
804 let (reversible_dwt53_batches, reversible_dwt53_batch_jobs) = transform_integer_batch_tiles(
805 &mut prepared_tiles,
806 options,
807 scratch,
808 accelerator,
809 &mut timings,
810 )?;
811 let transform_us = transform_start.elapsed().as_micros();
812 timings.jpeg_dct_extract_us = extract_us;
813 timings.dct_to_wavelet_total_us = transform_us;
814 timings.tile_count = prepared_tiles.len();
815
816 let encode_start = Instant::now();
817 let encoded_tiles = encode_integer_prepared_tiles(prepared_tiles, options, encode_accelerator);
818 for (tile_index, encoded) in encoded_tiles {
819 add_encode_timing_counters_from_result(&mut timings, &encoded);
820 tile_results[tile_index] = Some(encoded);
821 }
822 let encode_us = encode_start.elapsed().as_micros();
823 timings.htj2k_encode_us = encode_us;
824
825 let output_tiles = tile_results
826 .into_iter()
827 .map(|tile| {
828 tile.unwrap_or(Err(JpegToHtj2kError::Validation(
829 "batch transcode did not produce a tile result",
830 )))
831 })
832 .collect::<Vec<_>>();
833 Ok(batch_output(
834 output_tiles,
835 BatchTranscodeReport {
836 tile_count: tiles.len(),
837 successful_tiles: 0,
838 failed_tiles: 0,
839 transformed_components: reversible_dwt53_batch_jobs,
840 reversible_dwt53_batches,
841 reversible_dwt53_batch_jobs,
842 extract_us,
843 transform_us,
844 encode_us,
845 timings,
846 coefficient_path: options.coefficient_path,
847 },
848 ))
849}
850
851fn jpeg_float97_tile_batch_to_htj2k_with_scratch<
852 A: DctToWaveletStageAccelerator,
853 E: J2kEncodeStageAccelerator,
854>(
855 tiles: &[JpegTileBatchInput<'_>],
856 options: &JpegToHtj2kOptions,
857 scratch: &mut JpegToHtj2kScratch,
858 accelerator: &mut A,
859 encode_accelerator: &mut E,
860) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
861 let extract_start = Instant::now();
862 let prepared_results = tiles
863 .par_iter()
864 .enumerate()
865 .map(|(tile_index, tile)| {
866 (
867 tile_index,
868 prepare_float97_batch_tile(tile_index, tile.bytes, options),
869 )
870 })
871 .collect::<Vec<_>>();
872 let extract_us = extract_start.elapsed().as_micros();
873 let mut tile_results: Vec<Option<Result<EncodedTranscode, JpegToHtj2kError>>> =
874 (0..tiles.len()).map(|_| None).collect();
875 let mut prepared_tiles = Vec::new();
876 for (tile_index, result) in prepared_results {
877 match result {
878 Ok(prepared) => prepared_tiles.push(prepared),
879 Err(error) => tile_results[tile_index] = Some(Err(error)),
880 }
881 }
882
883 let transform_start = Instant::now();
884 let mut timings = TranscodeTimingReport::default();
885 let (_dwt97_batches, dwt97_batch_jobs) = transform_float97_batch_tiles(
886 &mut prepared_tiles,
887 options,
888 scratch,
889 accelerator,
890 &mut timings,
891 )?;
892 let transform_us = transform_start.elapsed().as_micros();
893 timings.jpeg_dct_extract_us = extract_us;
894 timings.dct_to_wavelet_total_us = transform_us;
895 timings.tile_count = prepared_tiles.len();
896
897 let encode_start = Instant::now();
898 let encoded_tiles = encode_float97_prepared_tiles(prepared_tiles, options, encode_accelerator);
899 for (tile_index, encoded) in encoded_tiles {
900 add_encode_timing_counters_from_result(&mut timings, &encoded);
901 tile_results[tile_index] = Some(encoded);
902 }
903 let encode_us = encode_start.elapsed().as_micros();
904 timings.htj2k_encode_us = encode_us;
905
906 let output_tiles = tile_results
907 .into_iter()
908 .map(|tile| {
909 tile.unwrap_or(Err(JpegToHtj2kError::Validation(
910 "9/7 batch transcode did not produce a tile result",
911 )))
912 })
913 .collect::<Vec<_>>();
914 Ok(batch_output(
915 output_tiles,
916 BatchTranscodeReport {
917 tile_count: tiles.len(),
918 successful_tiles: 0,
919 failed_tiles: 0,
920 transformed_components: dwt97_batch_jobs,
921 reversible_dwt53_batches: 0,
922 reversible_dwt53_batch_jobs: 0,
923 extract_us,
924 transform_us,
925 encode_us,
926 timings,
927 coefficient_path: options.coefficient_path,
928 },
929 ))
930}
931
932fn transcode_tile_batch_individually<
933 A: DctToWaveletStageAccelerator,
934 E: J2kEncodeStageAccelerator,
935>(
936 tiles: &[JpegTileBatchInput<'_>],
937 options: &JpegToHtj2kOptions,
938 scratch: &mut JpegToHtj2kScratch,
939 accelerator: &mut A,
940 encode_accelerator: &mut E,
941) -> EncodedTranscodeBatch {
942 let start = Instant::now();
943 let output_tiles = tiles
944 .iter()
945 .map(|tile| {
946 jpeg_to_htj2k_with_scratch(
947 tile.bytes,
948 options,
949 scratch,
950 accelerator,
951 encode_accelerator,
952 )
953 })
954 .collect::<Vec<_>>();
955 let mut timings = aggregate_tile_timings(&output_tiles);
956 timings.tile_count = output_tiles.iter().filter(|tile| tile.is_ok()).count();
957 let elapsed_us = start.elapsed().as_micros();
958 if timings.dct_to_wavelet_total_us == 0 {
959 timings.dct_to_wavelet_total_us = elapsed_us
960 .saturating_sub(timings.jpeg_dct_extract_us)
961 .saturating_sub(timings.htj2k_encode_us);
962 }
963 batch_output(
964 output_tiles,
965 BatchTranscodeReport {
966 tile_count: tiles.len(),
967 successful_tiles: 0,
968 failed_tiles: 0,
969 transformed_components: timings.component_count,
970 reversible_dwt53_batches: 0,
971 reversible_dwt53_batch_jobs: 0,
972 extract_us: timings.jpeg_dct_extract_us,
973 transform_us: timings.dct_to_wavelet_total_us,
974 encode_us: timings.htj2k_encode_us,
975 timings,
976 coefficient_path: options.coefficient_path,
977 },
978 )
979}
980
981fn aggregate_tile_timings(
982 tiles: &[Result<EncodedTranscode, JpegToHtj2kError>],
983) -> TranscodeTimingReport {
984 let mut timings = TranscodeTimingReport::default();
985 for tile in tiles.iter().filter_map(|tile| tile.as_ref().ok()) {
986 timings.add_assign(tile.report.timings);
987 }
988 timings
989}
990
991fn batch_output(
992 tiles: Vec<Result<EncodedTranscode, JpegToHtj2kError>>,
993 mut report: BatchTranscodeReport,
994) -> EncodedTranscodeBatch {
995 report.successful_tiles = tiles.iter().filter(|tile| tile.is_ok()).count();
996 report.failed_tiles = tiles.len().saturating_sub(report.successful_tiles);
997 EncodedTranscodeBatch { tiles, report }
998}
999
1000struct IntegerBatchTile {
1001 tile_index: usize,
1002 jpeg: JpegDctImage,
1003 component_sampling: Vec<(u8, u8)>,
1004 decomposition_levels: u8,
1005 all_unit_sampled: bool,
1006 component_reports: Vec<TranscodeComponentReport>,
1007 precomputed_components: Vec<Option<PrecomputedHtj2k53Component>>,
1008 float_validation_actual: Vec<i32>,
1009 float_validation_expected: Vec<i32>,
1010 integer_validation_actual: Vec<i32>,
1011 integer_validation_expected: Vec<i32>,
1012 timings: TranscodeTimingReport,
1013}
1014
1015struct Float97BatchTile {
1016 tile_index: usize,
1017 jpeg: JpegDctImage,
1018 component_sampling: Vec<(u8, u8)>,
1019 decomposition_levels: u8,
1020 all_unit_sampled: bool,
1021 component_reports: Vec<TranscodeComponentReport>,
1022 precomputed_components: Vec<Option<PrecomputedHtj2k97Component>>,
1023 preencoded_compact_payload: Vec<u8>,
1024 preencoded_compact_components: Vec<Option<PreencodedHtj2k97CompactComponent>>,
1025 preencoded_components: Vec<Option<PreencodedHtj2k97Component>>,
1026 prequantized_components: Vec<Option<PrequantizedHtj2k97Component>>,
1027 float_validation_actual: Vec<i32>,
1028 float_validation_expected: Vec<i32>,
1029 timings: TranscodeTimingReport,
1030}
1031
1032struct Float97PrecomputedBatchRecord {
1033 tile_index: usize,
1034 jpeg: JpegDctImage,
1035 decomposition_levels: u8,
1036 all_unit_sampled: bool,
1037 component_reports: Vec<TranscodeComponentReport>,
1038 float_validation_actual: Vec<i32>,
1039 float_validation_expected: Vec<i32>,
1040 timings: TranscodeTimingReport,
1041}
1042
1043#[derive(Clone, Copy)]
1044struct BatchComponentRef {
1045 tile_index: usize,
1046 component_index: usize,
1047}
1048
1049fn prepare_integer_batch_tile(
1050 tile_index: usize,
1051 bytes: &[u8],
1052 options: &JpegToHtj2kOptions,
1053) -> Result<IntegerBatchTile, JpegToHtj2kError> {
1054 let extract_start = Instant::now();
1055 let jpeg = extract_dct_blocks(bytes, DctExtractOptions::default())?;
1056 let timings = TranscodeTimingReport {
1057 jpeg_dct_extract_us: extract_start.elapsed().as_micros(),
1058 tile_count: 1,
1059 ..TranscodeTimingReport::default()
1060 };
1061 if jpeg.components.is_empty() || jpeg.components.len() > 4 {
1062 return Err(JpegToHtj2kError::Unsupported(
1063 "unsupported JPEG component count for jpeg_to_htj2k",
1064 ));
1065 }
1066 let component_sampling =
1067 component_sampling_for_jpeg(&jpeg.components, jpeg.width, jpeg.height)?;
1068 let decomposition_levels = decomposition_levels_for_components(
1069 &jpeg.components,
1070 options.encode_options.num_decomposition_levels,
1071 )?;
1072 let all_unit_sampled = component_sampling
1073 .iter()
1074 .all(|&(x_rsiz, y_rsiz)| x_rsiz == 1 && y_rsiz == 1);
1075 let component_reports = jpeg
1076 .components
1077 .iter()
1078 .zip(component_sampling.iter().copied())
1079 .map(|(component, (x_rsiz, y_rsiz))| TranscodeComponentReport {
1080 component_index: component.component_index,
1081 width: component.width,
1082 height: component.height,
1083 block_cols: component.block_cols,
1084 block_rows: component.block_rows,
1085 x_rsiz,
1086 y_rsiz,
1087 })
1088 .collect::<Vec<_>>();
1089 let precomputed_components = (0..jpeg.components.len()).map(|_| None).collect();
1090
1091 Ok(IntegerBatchTile {
1092 tile_index,
1093 jpeg,
1094 component_sampling,
1095 decomposition_levels,
1096 all_unit_sampled,
1097 component_reports,
1098 precomputed_components,
1099 float_validation_actual: Vec::new(),
1100 float_validation_expected: Vec::new(),
1101 integer_validation_actual: Vec::new(),
1102 integer_validation_expected: Vec::new(),
1103 timings,
1104 })
1105}
1106
1107fn prepare_float97_batch_tile(
1108 tile_index: usize,
1109 bytes: &[u8],
1110 options: &JpegToHtj2kOptions,
1111) -> Result<Float97BatchTile, JpegToHtj2kError> {
1112 let extract_start = Instant::now();
1113 let jpeg = extract_dct_blocks(bytes, DctExtractOptions::dequantized_only())?;
1114 let timings = TranscodeTimingReport {
1115 jpeg_dct_extract_us: extract_start.elapsed().as_micros(),
1116 tile_count: 1,
1117 ..TranscodeTimingReport::default()
1118 };
1119 if jpeg.components.is_empty() || jpeg.components.len() > 4 {
1120 return Err(JpegToHtj2kError::Unsupported(
1121 "unsupported JPEG component count for jpeg_to_htj2k",
1122 ));
1123 }
1124 let component_sampling =
1125 component_sampling_for_jpeg(&jpeg.components, jpeg.width, jpeg.height)?;
1126 let decomposition_levels = decomposition_levels_for_components(
1127 &jpeg.components,
1128 options.encode_options.num_decomposition_levels,
1129 )?;
1130 let all_unit_sampled = component_sampling
1131 .iter()
1132 .all(|&(x_rsiz, y_rsiz)| x_rsiz == 1 && y_rsiz == 1);
1133 let component_reports = jpeg
1134 .components
1135 .iter()
1136 .zip(component_sampling.iter().copied())
1137 .map(|(component, (x_rsiz, y_rsiz))| TranscodeComponentReport {
1138 component_index: component.component_index,
1139 width: component.width,
1140 height: component.height,
1141 block_cols: component.block_cols,
1142 block_rows: component.block_rows,
1143 x_rsiz,
1144 y_rsiz,
1145 })
1146 .collect::<Vec<_>>();
1147 let precomputed_components = (0..jpeg.components.len()).map(|_| None).collect();
1148 let preencoded_compact_components = (0..jpeg.components.len()).map(|_| None).collect();
1149 let preencoded_components = (0..jpeg.components.len()).map(|_| None).collect();
1150 let prequantized_components = (0..jpeg.components.len()).map(|_| None).collect();
1151
1152 Ok(Float97BatchTile {
1153 tile_index,
1154 jpeg,
1155 component_sampling,
1156 decomposition_levels,
1157 all_unit_sampled,
1158 component_reports,
1159 precomputed_components,
1160 preencoded_compact_payload: Vec::new(),
1161 preencoded_compact_components,
1162 preencoded_components,
1163 prequantized_components,
1164 float_validation_actual: Vec::new(),
1165 float_validation_expected: Vec::new(),
1166 timings,
1167 })
1168}
1169
1170fn transform_integer_batch_tiles<A: DctToWaveletStageAccelerator>(
1171 tiles: &mut [IntegerBatchTile],
1172 options: &JpegToHtj2kOptions,
1173 scratch: &mut JpegToHtj2kScratch,
1174 accelerator: &mut A,
1175 timings: &mut TranscodeTimingReport,
1176) -> Result<(usize, usize), JpegToHtj2kError> {
1177 let groups = batch_component_groups(tiles);
1178 let mut batch_count = 0usize;
1179 let mut job_count = 0usize;
1180
1181 for group in groups {
1182 batch_count = batch_count.saturating_add(1);
1183 job_count = job_count.saturating_add(group.len());
1184 let wavelets =
1185 integer_wavelets_for_batch_group(&group, tiles, scratch, accelerator, timings)?;
1186 for (component_ref, wavelet) in group.into_iter().zip(wavelets) {
1187 store_integer_batch_wavelet(component_ref, &wavelet, tiles, options, scratch)?;
1188 }
1189 }
1190
1191 Ok((batch_count, job_count))
1192}
1193
1194fn transform_float97_batch_tiles<A: DctToWaveletStageAccelerator>(
1195 tiles: &mut [Float97BatchTile],
1196 options: &JpegToHtj2kOptions,
1197 scratch: &mut JpegToHtj2kScratch,
1198 accelerator: &mut A,
1199 timings: &mut TranscodeTimingReport,
1200) -> Result<(usize, usize), JpegToHtj2kError> {
1201 let groups = float97_batch_component_groups(tiles);
1202 let grouped_i16_preencoded = try_store_grouped_i16_preencoded_float97_batches(
1203 &groups,
1204 tiles,
1205 options,
1206 accelerator,
1207 timings,
1208 )?;
1209 let mut batch_count = 0usize;
1210 let mut job_count = 0usize;
1211
1212 for (group_index, group) in groups.into_iter().enumerate() {
1213 batch_count = batch_count.saturating_add(1);
1214 job_count = job_count.saturating_add(group.len());
1215 if grouped_i16_preencoded
1216 .get(group_index)
1217 .copied()
1218 .unwrap_or(false)
1219 {
1220 continue;
1221 }
1222 if try_store_prequantized_float97_batch_group(&group, tiles, options, accelerator, timings)?
1223 {
1224 continue;
1225 }
1226 let wavelets =
1227 float97_wavelets_for_batch_group(&group, tiles, scratch, accelerator, timings)?;
1228 for (component_ref, wavelet) in group.into_iter().zip(wavelets) {
1229 store_float97_batch_wavelet(component_ref, &wavelet, tiles, options, scratch)?;
1230 }
1231 }
1232
1233 Ok((batch_count, job_count))
1234}
1235
1236fn batch_component_groups(tiles: &[IntegerBatchTile]) -> Vec<Vec<BatchComponentRef>> {
1237 let mut groups: Vec<Vec<BatchComponentRef>> = Vec::new();
1238
1239 for (tile_index, tile) in tiles.iter().enumerate() {
1240 for (component_index, component) in tile.jpeg.components.iter().enumerate() {
1241 let component_ref = BatchComponentRef {
1242 tile_index,
1243 component_index,
1244 };
1245 if let Some(group) = groups.iter_mut().find(|group| {
1246 let first = group[0];
1247 same_batch_component_key(
1248 &tiles[first.tile_index],
1249 first.component_index,
1250 tile,
1251 component_index,
1252 )
1253 }) {
1254 group.push(component_ref);
1255 } else {
1256 let _ = component;
1257 groups.push(vec![component_ref]);
1258 }
1259 }
1260 }
1261
1262 groups
1263}
1264
1265fn float97_batch_component_groups(tiles: &[Float97BatchTile]) -> Vec<Vec<BatchComponentRef>> {
1266 let mut groups: Vec<Vec<BatchComponentRef>> = Vec::new();
1267
1268 for (tile_index, tile) in tiles.iter().enumerate() {
1269 for component_index in 0..tile.jpeg.components.len() {
1270 let component_ref = BatchComponentRef {
1271 tile_index,
1272 component_index,
1273 };
1274 if let Some(group) = groups.iter_mut().find(|group| {
1275 let first = group[0];
1276 same_float97_batch_component_key(
1277 &tiles[first.tile_index],
1278 first.component_index,
1279 tile,
1280 component_index,
1281 )
1282 }) {
1283 group.push(component_ref);
1284 } else {
1285 groups.push(vec![component_ref]);
1286 }
1287 }
1288 }
1289
1290 groups
1291}
1292
1293fn same_batch_component_key(
1294 left_tile: &IntegerBatchTile,
1295 left_component_index: usize,
1296 right_tile: &IntegerBatchTile,
1297 right_component_index: usize,
1298) -> bool {
1299 let left = &left_tile.jpeg.components[left_component_index];
1300 let right = &right_tile.jpeg.components[right_component_index];
1301 left.component_index == right.component_index
1302 && left.width == right.width
1303 && left.height == right.height
1304 && left.block_cols == right.block_cols
1305 && left.block_rows == right.block_rows
1306 && left_tile.component_sampling[left_component_index]
1307 == right_tile.component_sampling[right_component_index]
1308}
1309
1310fn same_float97_batch_component_key(
1311 left_tile: &Float97BatchTile,
1312 left_component_index: usize,
1313 right_tile: &Float97BatchTile,
1314 right_component_index: usize,
1315) -> bool {
1316 let left = &left_tile.jpeg.components[left_component_index];
1317 let right = &right_tile.jpeg.components[right_component_index];
1318 left.width == right.width
1319 && left.height == right.height
1320 && left.block_cols == right.block_cols
1321 && left.block_rows == right.block_rows
1322 && left_tile.component_sampling[left_component_index]
1323 == right_tile.component_sampling[right_component_index]
1324}
1325
1326fn integer_wavelets_for_batch_group<A: DctToWaveletStageAccelerator>(
1327 group: &[BatchComponentRef],
1328 tiles: &[IntegerBatchTile],
1329 scratch: &mut JpegToHtj2kScratch,
1330 accelerator: &mut A,
1331 timings: &mut TranscodeTimingReport,
1332) -> Result<Vec<IntegerWavelet>, JpegToHtj2kError> {
1333 let jobs = group
1334 .iter()
1335 .map(|component_ref| {
1336 integer_dct_job_for_component(
1337 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index],
1338 )
1339 })
1340 .collect::<Result<Vec<_>, _>>()?;
1341 record_batch_attempt(timings, group.len());
1342 let accelerator_start = Instant::now();
1343 let accelerated = accelerator
1344 .dct_grid_to_reversible_dwt53_batch(&jobs)
1345 .map_err(JpegToHtj2kError::Accelerator)?;
1346 timings.dct_to_wavelet_accelerator_us = timings
1347 .dct_to_wavelet_accelerator_us
1348 .saturating_add(accelerator_start.elapsed().as_micros());
1349
1350 if let Some(first_levels) = accelerated {
1351 if first_levels.len() != group.len() {
1352 return Err(JpegToHtj2kError::Validation(
1353 "reversible 5/3 batch accelerator returned wrong component count",
1354 ));
1355 }
1356 timings.component_count = timings.component_count.saturating_add(group.len());
1357 record_accelerator_dispatch(timings, group.len());
1358 let decompose_start = Instant::now();
1359 let wavelets = first_levels
1360 .into_iter()
1361 .zip(group.iter().copied())
1362 .map(|(first_level, component_ref)| {
1363 integer_wavelet_from_first_level(
1364 first_level,
1365 tiles[component_ref.tile_index].decomposition_levels,
1366 )
1367 })
1368 .collect();
1369 timings.dwt_decompose_us = timings
1370 .dwt_decompose_us
1371 .saturating_add(decompose_start.elapsed().as_micros());
1372 return Ok(wavelets);
1373 }
1374
1375 group
1376 .iter()
1377 .map(|component_ref| {
1378 integer_direct_wavelet_from_component(
1379 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index],
1380 tiles[component_ref.tile_index].decomposition_levels,
1381 scratch,
1382 accelerator,
1383 timings,
1384 )
1385 })
1386 .collect()
1387}
1388
1389fn i16_htj2k97_jobs_for_batch_group<'a>(
1390 group: &[BatchComponentRef],
1391 tiles: &'a [Float97BatchTile],
1392) -> Result<Vec<DctGridI16ToHtj2k97CodeBlockJob<'a>>, JpegToHtj2kError> {
1393 group
1394 .iter()
1395 .map(|component_ref| {
1396 let tile = &tiles[component_ref.tile_index];
1397 let component = &tile.jpeg.components[component_ref.component_index];
1398 let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
1399 validate_component_block_grid(component)?;
1400 Ok(DctGridI16ToHtj2k97CodeBlockJob {
1401 dequantized_blocks: &component.dequantized_blocks,
1402 block_cols: component.block_cols as usize,
1403 block_rows: component.block_rows as usize,
1404 width: component.width as usize,
1405 height: component.height as usize,
1406 x_rsiz,
1407 y_rsiz,
1408 })
1409 })
1410 .collect()
1411}
1412
1413fn store_compact_preencoded_component(
1414 tile: &mut Float97BatchTile,
1415 component_index: usize,
1416 batch_payload: &[u8],
1417 mut component: PreencodedHtj2k97CompactComponent,
1418) -> Result<(), JpegToHtj2kError> {
1419 if component_index >= tile.preencoded_compact_components.len() {
1420 return Err(JpegToHtj2kError::Validation(
1421 "compact preencoded component index out of range",
1422 ));
1423 }
1424
1425 for resolution in &mut component.resolutions {
1426 for subband in &mut resolution.subbands {
1427 for block in &mut subband.code_blocks {
1428 if block.payload_range.start > block.payload_range.end
1429 || block.payload_range.end > batch_payload.len()
1430 {
1431 return Err(JpegToHtj2kError::Validation(
1432 "compact preencoded payload range out of bounds",
1433 ));
1434 }
1435 let start = tile.preencoded_compact_payload.len();
1436 tile.preencoded_compact_payload
1437 .extend_from_slice(&batch_payload[block.payload_range.clone()]);
1438 let end = tile.preencoded_compact_payload.len();
1439 block.payload_range = start..end;
1440 }
1441 }
1442 }
1443
1444 tile.preencoded_compact_components[component_index] = Some(component);
1445 Ok(())
1446}
1447
1448#[allow(clippy::too_many_lines)]
1449fn try_store_grouped_i16_preencoded_float97_batches<A: DctToWaveletStageAccelerator>(
1450 groups: &[Vec<BatchComponentRef>],
1451 tiles: &mut [Float97BatchTile],
1452 options: &JpegToHtj2kOptions,
1453 accelerator: &mut A,
1454 timings: &mut TranscodeTimingReport,
1455) -> Result<Vec<bool>, JpegToHtj2kError> {
1456 let mut handled = vec![false; groups.len()];
1457 if !accelerator.supports_htj2k97_i16_preencoded_batch()
1458 || options.validate_against_float_reference
1459 || groups.len() <= 1
1460 {
1461 return Ok(handled);
1462 }
1463
1464 let eligible_indices = groups
1465 .iter()
1466 .enumerate()
1467 .filter_map(|(index, group)| {
1468 let eligible = group
1469 .iter()
1470 .all(|component_ref| tiles[component_ref.tile_index].decomposition_levels == 1);
1471 eligible.then_some(index)
1472 })
1473 .collect::<Vec<_>>();
1474 if eligible_indices.len() <= 1 {
1475 return Ok(handled);
1476 }
1477
1478 let codeblock_options = htj2k97_codeblock_options(&options.encode_options);
1479 let total_jobs = eligible_indices
1480 .iter()
1481 .map(|&index| groups[index].len())
1482 .sum::<usize>();
1483 record_accelerator_attempt(timings, total_jobs);
1484 let accelerator_start = Instant::now();
1485 let jobs_by_group = eligible_indices
1486 .iter()
1487 .map(|&index| i16_htj2k97_jobs_for_batch_group(&groups[index], tiles))
1488 .collect::<Result<Vec<_>, JpegToHtj2kError>>()?;
1489 let batches = jobs_by_group
1490 .iter()
1491 .map(|jobs| DctGridI16ToHtj2k97CodeBlockBatch { jobs })
1492 .collect::<Vec<_>>();
1493 let compact_grouped_components = if accelerator.supports_htj2k97_compact_preencoded_batch() {
1494 accelerator
1495 .dct_grid_i16_to_htj2k97_compact_preencoded_batch_groups(&batches, codeblock_options)
1496 .map_err(JpegToHtj2kError::Accelerator)?
1497 } else {
1498 None
1499 };
1500 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1501 add_dwt97_batch_stage_timings(timings, stage_timings);
1502 }
1503 if let Some(compact_grouped_components) = compact_grouped_components {
1504 timings.dct_to_wavelet_accelerator_us = timings
1505 .dct_to_wavelet_accelerator_us
1506 .saturating_add(accelerator_start.elapsed().as_micros());
1507 let compact_payload = compact_grouped_components.payload;
1508 let compact_groups = compact_grouped_components.groups;
1509 if compact_groups.len() != eligible_indices.len() {
1510 return Err(JpegToHtj2kError::Validation(
1511 "9/7 grouped i16 compact preencoded accelerator returned wrong group count",
1512 ));
1513 }
1514 for (&group_index, components) in eligible_indices.iter().zip(compact_groups) {
1515 let group = &groups[group_index];
1516 if components.len() != group.len() {
1517 return Err(JpegToHtj2kError::Validation(
1518 "9/7 grouped i16 compact preencoded accelerator returned wrong component count",
1519 ));
1520 }
1521
1522 timings.component_count = timings.component_count.saturating_add(group.len());
1523 record_batch_dispatch(timings, group.len());
1524 for (component_ref, component) in group.iter().copied().zip(components) {
1525 store_compact_preencoded_component(
1526 &mut tiles[component_ref.tile_index],
1527 component_ref.component_index,
1528 &compact_payload,
1529 component,
1530 )?;
1531 }
1532 handled[group_index] = true;
1533 }
1534 return Ok(handled);
1535 }
1536
1537 let grouped_components = accelerator
1538 .dct_grid_i16_to_htj2k97_preencoded_batch_groups(&batches, codeblock_options)
1539 .map_err(JpegToHtj2kError::Accelerator)?;
1540 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1541 add_dwt97_batch_stage_timings(timings, stage_timings);
1542 }
1543 timings.dct_to_wavelet_accelerator_us = timings
1544 .dct_to_wavelet_accelerator_us
1545 .saturating_add(accelerator_start.elapsed().as_micros());
1546
1547 let Some(grouped_components) = grouped_components else {
1548 return Ok(handled);
1549 };
1550 if grouped_components.len() != eligible_indices.len() {
1551 return Err(JpegToHtj2kError::Validation(
1552 "9/7 grouped i16 preencoded accelerator returned wrong group count",
1553 ));
1554 }
1555
1556 for (&group_index, components) in eligible_indices.iter().zip(grouped_components) {
1557 let group = &groups[group_index];
1558 if components.len() != group.len() {
1559 return Err(JpegToHtj2kError::Validation(
1560 "9/7 grouped i16 preencoded accelerator returned wrong component count",
1561 ));
1562 }
1563
1564 timings.component_count = timings.component_count.saturating_add(group.len());
1565 record_batch_dispatch(timings, group.len());
1566 for (component_ref, component) in group.iter().copied().zip(components) {
1567 tiles[component_ref.tile_index].preencoded_components[component_ref.component_index] =
1568 Some(component);
1569 }
1570 handled[group_index] = true;
1571 }
1572
1573 Ok(handled)
1574}
1575
1576#[allow(clippy::too_many_lines)]
1577fn try_store_prequantized_float97_batch_group<A: DctToWaveletStageAccelerator>(
1578 group: &[BatchComponentRef],
1579 tiles: &mut [Float97BatchTile],
1580 options: &JpegToHtj2kOptions,
1581 accelerator: &mut A,
1582 timings: &mut TranscodeTimingReport,
1583) -> Result<bool, JpegToHtj2kError> {
1584 if !(accelerator.supports_htj2k97_codeblock_batch()
1585 || accelerator.supports_htj2k97_i16_preencoded_batch())
1586 || options.validate_against_float_reference
1587 || group
1588 .iter()
1589 .any(|component_ref| tiles[component_ref.tile_index].decomposition_levels != 1)
1590 {
1591 return Ok(false);
1592 }
1593
1594 let codeblock_options = htj2k97_codeblock_options(&options.encode_options);
1595 if accelerator.supports_htj2k97_i16_preencoded_batch() {
1596 let jobs = i16_htj2k97_jobs_for_batch_group(group, tiles)?;
1597
1598 record_accelerator_attempt(timings, group.len());
1599 let accelerator_start = Instant::now();
1600 let compact_preencoded_components =
1601 if accelerator.supports_htj2k97_compact_preencoded_batch() {
1602 accelerator
1603 .dct_grid_i16_to_htj2k97_compact_preencoded_batch(&jobs, codeblock_options)
1604 .map_err(JpegToHtj2kError::Accelerator)?
1605 } else {
1606 None
1607 };
1608 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1609 add_dwt97_batch_stage_timings(timings, stage_timings);
1610 }
1611 if let Some(compact_batch) = compact_preencoded_components {
1612 timings.dct_to_wavelet_accelerator_us = timings
1613 .dct_to_wavelet_accelerator_us
1614 .saturating_add(accelerator_start.elapsed().as_micros());
1615 if compact_batch.components.len() != group.len() {
1616 return Err(JpegToHtj2kError::Validation(
1617 "9/7 i16 compact preencoded accelerator returned wrong component count",
1618 ));
1619 }
1620
1621 timings.component_count = timings.component_count.saturating_add(group.len());
1622 record_batch_dispatch(timings, group.len());
1623 for (component_ref, component) in group.iter().copied().zip(compact_batch.components) {
1624 store_compact_preencoded_component(
1625 &mut tiles[component_ref.tile_index],
1626 component_ref.component_index,
1627 &compact_batch.payload,
1628 component,
1629 )?;
1630 }
1631
1632 return Ok(true);
1633 }
1634
1635 let preencoded_components = accelerator
1636 .dct_grid_i16_to_htj2k97_preencoded_batch(&jobs, codeblock_options)
1637 .map_err(JpegToHtj2kError::Accelerator)?;
1638 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1639 add_dwt97_batch_stage_timings(timings, stage_timings);
1640 }
1641 timings.dct_to_wavelet_accelerator_us = timings
1642 .dct_to_wavelet_accelerator_us
1643 .saturating_add(accelerator_start.elapsed().as_micros());
1644 if let Some(components) = preencoded_components {
1645 if components.len() != group.len() {
1646 return Err(JpegToHtj2kError::Validation(
1647 "9/7 i16 preencoded accelerator returned wrong component count",
1648 ));
1649 }
1650
1651 timings.component_count = timings.component_count.saturating_add(group.len());
1652 record_batch_dispatch(timings, group.len());
1653 for (component_ref, component) in group.iter().copied().zip(components) {
1654 tiles[component_ref.tile_index].preencoded_components
1655 [component_ref.component_index] = Some(component);
1656 }
1657
1658 return Ok(true);
1659 }
1660 }
1661
1662 let repack_start = Instant::now();
1663 let block_storage = group
1664 .par_iter()
1665 .map(|component_ref| {
1666 dct_blocks_to_8x8_f64(
1667 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index]
1668 .dequantized_blocks,
1669 )
1670 })
1671 .collect::<Vec<_>>();
1672 timings.jpeg_dct_repack_us = timings
1673 .jpeg_dct_repack_us
1674 .saturating_add(repack_start.elapsed().as_micros());
1675
1676 let jobs = group
1677 .iter()
1678 .zip(block_storage.iter())
1679 .map(|(component_ref, blocks)| {
1680 let tile = &tiles[component_ref.tile_index];
1681 let component = &tile.jpeg.components[component_ref.component_index];
1682 let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
1683 validate_component_block_grid(component)?;
1684 Ok(DctGridToHtj2k97CodeBlockJob {
1685 blocks,
1686 block_cols: component.block_cols as usize,
1687 block_rows: component.block_rows as usize,
1688 width: component.width as usize,
1689 height: component.height as usize,
1690 x_rsiz,
1691 y_rsiz,
1692 })
1693 })
1694 .collect::<Result<Vec<_>, JpegToHtj2kError>>()?;
1695
1696 record_accelerator_attempt(timings, group.len());
1697 let accelerator_start = Instant::now();
1698 let preencoded_components = accelerator
1699 .dct_grid_to_htj2k97_preencoded_batch(&jobs, codeblock_options)
1700 .map_err(JpegToHtj2kError::Accelerator)?;
1701 if let Some(components) = preencoded_components {
1702 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1703 add_dwt97_batch_stage_timings(timings, stage_timings);
1704 }
1705 timings.dct_to_wavelet_accelerator_us = timings
1706 .dct_to_wavelet_accelerator_us
1707 .saturating_add(accelerator_start.elapsed().as_micros());
1708 if components.len() != group.len() {
1709 return Err(JpegToHtj2kError::Validation(
1710 "9/7 preencoded accelerator returned wrong component count",
1711 ));
1712 }
1713
1714 timings.component_count = timings.component_count.saturating_add(group.len());
1715 record_batch_dispatch(timings, group.len());
1716 for (component_ref, component) in group.iter().copied().zip(components) {
1717 tiles[component_ref.tile_index].preencoded_components[component_ref.component_index] =
1718 Some(component);
1719 }
1720
1721 return Ok(true);
1722 }
1723
1724 let accelerated_components = accelerator
1725 .dct_grid_to_htj2k97_codeblock_batch(&jobs, codeblock_options)
1726 .map_err(JpegToHtj2kError::Accelerator)?;
1727 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1728 add_dwt97_batch_stage_timings(timings, stage_timings);
1729 }
1730 timings.dct_to_wavelet_accelerator_us = timings
1731 .dct_to_wavelet_accelerator_us
1732 .saturating_add(accelerator_start.elapsed().as_micros());
1733
1734 let Some(components) = accelerated_components else {
1735 return Ok(false);
1736 };
1737 if components.len() != group.len() {
1738 return Err(JpegToHtj2kError::Validation(
1739 "9/7 code-block accelerator returned wrong component count",
1740 ));
1741 }
1742
1743 timings.component_count = timings.component_count.saturating_add(group.len());
1744 record_batch_dispatch(timings, group.len());
1745 for (component_ref, component) in group.iter().copied().zip(components) {
1746 tiles[component_ref.tile_index].prequantized_components[component_ref.component_index] =
1747 Some(component);
1748 }
1749
1750 Ok(true)
1751}
1752
1753fn htj2k97_codeblock_options(options: &JpegToHtj2kEncodeOptions) -> Htj2k97CodeBlockOptions {
1754 Htj2k97CodeBlockOptions {
1755 bit_depth: 8,
1756 guard_bits: options.guard_bits.max(2),
1757 code_block_width_exp: options.code_block_width_exp,
1758 code_block_height_exp: options.code_block_height_exp,
1759 irreversible_quantization_scale: options.irreversible_quantization_scale,
1760 irreversible_quantization_subband_scales: options.irreversible_quantization_subband_scales,
1761 }
1762}
1763
1764fn native_progression_order(
1765 progression: J2kProgressionOrder,
1766) -> j2k_native::EncodeProgressionOrder {
1767 match progression {
1768 J2kProgressionOrder::Lrcp => j2k_native::EncodeProgressionOrder::Lrcp,
1769 J2kProgressionOrder::Rlcp => j2k_native::EncodeProgressionOrder::Rlcp,
1770 J2kProgressionOrder::Rpcl => j2k_native::EncodeProgressionOrder::Rpcl,
1771 J2kProgressionOrder::Pcrl => j2k_native::EncodeProgressionOrder::Pcrl,
1772 J2kProgressionOrder::Cprl => j2k_native::EncodeProgressionOrder::Cprl,
1773 }
1774}
1775
1776fn float97_wavelets_for_batch_group<A: DctToWaveletStageAccelerator>(
1777 group: &[BatchComponentRef],
1778 tiles: &[Float97BatchTile],
1779 scratch: &mut JpegToHtj2kScratch,
1780 accelerator: &mut A,
1781 timings: &mut TranscodeTimingReport,
1782) -> Result<Vec<ComponentWavelet97>, JpegToHtj2kError> {
1783 let repack_start = Instant::now();
1784 let block_storage = group
1785 .iter()
1786 .map(|component_ref| {
1787 dct_blocks_to_8x8_f64(
1788 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index]
1789 .dequantized_blocks,
1790 )
1791 })
1792 .collect::<Vec<_>>();
1793 timings.jpeg_dct_repack_us = timings
1794 .jpeg_dct_repack_us
1795 .saturating_add(repack_start.elapsed().as_micros());
1796
1797 let jobs = group
1798 .iter()
1799 .zip(block_storage.iter())
1800 .map(|(component_ref, blocks)| {
1801 let component =
1802 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index];
1803 validate_component_block_grid(component)?;
1804 Ok(DctGridToDwt97Job {
1805 blocks,
1806 block_cols: component.block_cols as usize,
1807 block_rows: component.block_rows as usize,
1808 width: component.width as usize,
1809 height: component.height as usize,
1810 })
1811 })
1812 .collect::<Result<Vec<_>, JpegToHtj2kError>>()?;
1813
1814 record_batch_attempt(timings, group.len());
1815 let accelerator_start = Instant::now();
1816 let accelerated_first_levels = accelerator
1817 .dct_grid_to_dwt97_batch(&jobs)
1818 .map_err(JpegToHtj2kError::Accelerator)?;
1819 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1820 add_dwt97_batch_stage_timings(timings, stage_timings);
1821 }
1822 timings.dct_to_wavelet_accelerator_us = timings
1823 .dct_to_wavelet_accelerator_us
1824 .saturating_add(accelerator_start.elapsed().as_micros());
1825
1826 if let Some(first_levels) = accelerated_first_levels {
1827 if first_levels.len() != group.len() {
1828 return Err(JpegToHtj2kError::Validation(
1829 "9/7 batch accelerator returned wrong component count",
1830 ));
1831 }
1832 timings.component_count = timings.component_count.saturating_add(group.len());
1833 record_accelerator_dispatch(timings, group.len());
1834 let decompose_start = Instant::now();
1835 let wavelets = first_levels
1836 .into_par_iter()
1837 .zip(group.par_iter().copied())
1838 .map(|(first_level, component_ref)| {
1839 decompose_97_from_first_level(
1840 first_level,
1841 usize::from(tiles[component_ref.tile_index].decomposition_levels),
1842 )
1843 })
1844 .collect::<Vec<_>>();
1845 timings.dwt_decompose_us = timings
1846 .dwt_decompose_us
1847 .saturating_add(decompose_start.elapsed().as_micros());
1848 return Ok(wavelets);
1849 }
1850
1851 group
1852 .iter()
1853 .map(|component_ref| {
1854 float_direct_97_wavelet_from_component(
1855 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index],
1856 tiles[component_ref.tile_index].decomposition_levels,
1857 scratch,
1858 accelerator,
1859 timings,
1860 )
1861 })
1862 .collect()
1863}
1864
1865fn add_dwt97_batch_stage_timings(
1866 timings: &mut TranscodeTimingReport,
1867 stage_timings: Dwt97BatchStageTimings,
1868) {
1869 timings.dwt97_batch_pack_upload_us = timings
1870 .dwt97_batch_pack_upload_us
1871 .saturating_add(stage_timings.pack_upload_us);
1872 timings.dwt97_batch_idct_row_lift_us = timings
1873 .dwt97_batch_idct_row_lift_us
1874 .saturating_add(stage_timings.idct_row_lift_us);
1875 timings.dwt97_batch_column_lift_us = timings
1876 .dwt97_batch_column_lift_us
1877 .saturating_add(stage_timings.column_lift_us);
1878 timings.dwt97_batch_quantize_codeblock_us = timings
1879 .dwt97_batch_quantize_codeblock_us
1880 .saturating_add(stage_timings.quantize_codeblock_us);
1881 timings.dwt97_batch_ht_encode_us = timings
1882 .dwt97_batch_ht_encode_us
1883 .saturating_add(stage_timings.ht_encode_us);
1884 timings.dwt97_batch_ht_kernel_us = timings
1885 .dwt97_batch_ht_kernel_us
1886 .saturating_add(stage_timings.ht_kernel_us);
1887 timings.dwt97_batch_ht_status_readback_us = timings
1888 .dwt97_batch_ht_status_readback_us
1889 .saturating_add(stage_timings.ht_status_readback_us);
1890 timings.dwt97_batch_ht_compact_us = timings
1891 .dwt97_batch_ht_compact_us
1892 .saturating_add(stage_timings.ht_compact_us);
1893 timings.dwt97_batch_ht_output_readback_us = timings
1894 .dwt97_batch_ht_output_readback_us
1895 .saturating_add(stage_timings.ht_output_readback_us);
1896 timings.dwt97_batch_ht_codeblock_dispatches = timings
1897 .dwt97_batch_ht_codeblock_dispatches
1898 .saturating_add(stage_timings.ht_codeblock_dispatches);
1899 timings.dwt97_batch_readback_us = timings
1900 .dwt97_batch_readback_us
1901 .saturating_add(stage_timings.readback_us);
1902}
1903
1904fn record_accelerator_attempt(timings: &mut TranscodeTimingReport, job_count: usize) {
1905 timings.accelerator_attempts = timings.accelerator_attempts.saturating_add(1);
1906 timings.accelerator_jobs = timings.accelerator_jobs.saturating_add(job_count);
1907}
1908
1909fn record_accelerator_dispatch(timings: &mut TranscodeTimingReport, job_count: usize) {
1910 timings.accelerator_dispatches = timings.accelerator_dispatches.saturating_add(1);
1911 timings.accelerator_dispatched_jobs = timings
1912 .accelerator_dispatched_jobs
1913 .saturating_add(job_count);
1914}
1915
1916fn record_batch_attempt(timings: &mut TranscodeTimingReport, job_count: usize) {
1917 timings.batch_count = timings.batch_count.saturating_add(1);
1918 timings.batch_jobs = timings.batch_jobs.saturating_add(job_count);
1919 record_accelerator_attempt(timings, job_count);
1920}
1921
1922fn record_batch_dispatch(timings: &mut TranscodeTimingReport, job_count: usize) {
1923 timings.batch_count = timings.batch_count.saturating_add(1);
1924 timings.batch_jobs = timings.batch_jobs.saturating_add(job_count);
1925 record_accelerator_dispatch(timings, job_count);
1926}
1927
1928fn record_cpu_fallback(timings: &mut TranscodeTimingReport, job_count: usize) {
1929 timings.cpu_fallback_jobs = timings.cpu_fallback_jobs.saturating_add(job_count);
1930}
1931
1932fn store_integer_batch_wavelet(
1933 component_ref: BatchComponentRef,
1934 wavelet: &IntegerWavelet,
1935 tiles: &mut [IntegerBatchTile],
1936 options: &JpegToHtj2kOptions,
1937 scratch: &mut JpegToHtj2kScratch,
1938) -> Result<(), JpegToHtj2kError> {
1939 let tile = &mut tiles[component_ref.tile_index];
1940 let component = &tile.jpeg.components[component_ref.component_index];
1941 let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
1942 let actual_coefficients = flatten_integer_wavelet(wavelet);
1943 tile.precomputed_components[component_ref.component_index] =
1944 Some(PrecomputedHtj2k53Component {
1945 x_rsiz,
1946 y_rsiz,
1947 dwt: j2k_dwt_from_integer_wavelet(wavelet),
1948 });
1949
1950 if options.validate_against_float_reference {
1951 tile.float_validation_actual
1952 .extend(actual_coefficients.clone());
1953 tile.float_validation_expected
1954 .extend(float_reference_coefficients(
1955 component,
1956 tile.decomposition_levels,
1957 scratch,
1958 )?);
1959 }
1960 if options.validate_against_integer_reference {
1961 tile.integer_validation_actual.extend(actual_coefficients);
1962 tile.integer_validation_expected
1963 .extend(integer_reference_coefficients(
1964 component,
1965 tile.decomposition_levels,
1966 )?);
1967 }
1968
1969 Ok(())
1970}
1971
1972fn store_float97_batch_wavelet(
1973 component_ref: BatchComponentRef,
1974 wavelet: &ComponentWavelet97,
1975 tiles: &mut [Float97BatchTile],
1976 options: &JpegToHtj2kOptions,
1977 scratch: &mut JpegToHtj2kScratch,
1978) -> Result<(), JpegToHtj2kError> {
1979 let tile = &mut tiles[component_ref.tile_index];
1980 let component = &tile.jpeg.components[component_ref.component_index];
1981 let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
1982 tile.precomputed_components[component_ref.component_index] =
1983 Some(PrecomputedHtj2k97Component {
1984 x_rsiz,
1985 y_rsiz,
1986 dwt: j2k_dwt97_from_wavelet(
1987 wavelet,
1988 component.width as usize,
1989 component.height as usize,
1990 ),
1991 });
1992
1993 if options.validate_against_float_reference {
1994 let actual_coefficients = rounded_wavelet97_i32(wavelet)?;
1995 tile.float_validation_actual.extend(actual_coefficients);
1996 tile.float_validation_expected
1997 .extend(float97_reference_coefficients(
1998 component,
1999 tile.decomposition_levels,
2000 scratch,
2001 )?);
2002 }
2003
2004 Ok(())
2005}
2006
2007fn record_encode_dispatch_delta(
2008 timings: &mut TranscodeTimingReport,
2009 before: J2kEncodeDispatchReport,
2010 after: J2kEncodeDispatchReport,
2011) {
2012 let delta = after.saturating_delta(before);
2013 timings.htj2k_encode_accelerator_dispatches = timings
2014 .htj2k_encode_accelerator_dispatches
2015 .saturating_add(delta.total());
2016 timings.htj2k_encode_ht_code_block_dispatches = timings
2017 .htj2k_encode_ht_code_block_dispatches
2018 .saturating_add(delta.ht_code_block);
2019 timings.htj2k_encode_packetization_dispatches = timings
2020 .htj2k_encode_packetization_dispatches
2021 .saturating_add(delta.packetization);
2022}
2023
2024fn add_encode_timing_counters_from_result(
2025 timings: &mut TranscodeTimingReport,
2026 tile: &Result<EncodedTranscode, JpegToHtj2kError>,
2027) {
2028 let Ok(tile) = tile else {
2029 return;
2030 };
2031 timings.htj2k_encode_accelerator_dispatches = timings
2032 .htj2k_encode_accelerator_dispatches
2033 .saturating_add(tile.report.timings.htj2k_encode_accelerator_dispatches);
2034 timings.htj2k_encode_ht_code_block_dispatches = timings
2035 .htj2k_encode_ht_code_block_dispatches
2036 .saturating_add(tile.report.timings.htj2k_encode_ht_code_block_dispatches);
2037 timings.htj2k_encode_packetization_dispatches = timings
2038 .htj2k_encode_packetization_dispatches
2039 .saturating_add(tile.report.timings.htj2k_encode_packetization_dispatches);
2040}
2041
2042fn encode_integer_prepared_tiles<E: J2kEncodeStageAccelerator>(
2043 prepared_tiles: Vec<IntegerBatchTile>,
2044 options: &JpegToHtj2kOptions,
2045 encode_accelerator: &mut E,
2046) -> Vec<(usize, Result<EncodedTranscode, JpegToHtj2kError>)> {
2047 if encode_accelerator.prefer_parallel_cpu_tile_encode() {
2048 return prepared_tiles
2049 .into_par_iter()
2050 .map(|prepared| {
2051 let tile_index = prepared.tile_index;
2052 let mut cpu_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
2053 (
2054 tile_index,
2055 encode_integer_batch_tile(prepared, options, &mut cpu_accelerator),
2056 )
2057 })
2058 .collect();
2059 }
2060
2061 prepared_tiles
2062 .into_iter()
2063 .map(|prepared| {
2064 let tile_index = prepared.tile_index;
2065 (
2066 tile_index,
2067 encode_integer_batch_tile(prepared, options, encode_accelerator),
2068 )
2069 })
2070 .collect()
2071}
2072
2073fn encode_float97_prepared_tiles<E: J2kEncodeStageAccelerator>(
2074 prepared_tiles: Vec<Float97BatchTile>,
2075 options: &JpegToHtj2kOptions,
2076 encode_accelerator: &mut E,
2077) -> Vec<(usize, Result<EncodedTranscode, JpegToHtj2kError>)> {
2078 if !encode_accelerator.prefer_parallel_cpu_tile_encode()
2079 && can_encode_float97_precomputed_tiles_batch(&prepared_tiles, options)
2080 {
2081 return encode_float97_precomputed_tiles_batch(prepared_tiles, options, encode_accelerator);
2082 }
2083
2084 if encode_accelerator.prefer_parallel_cpu_tile_encode() {
2085 return prepared_tiles
2086 .into_par_iter()
2087 .map(|prepared| {
2088 let tile_index = prepared.tile_index;
2089 let mut cpu_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
2090 (
2091 tile_index,
2092 encode_float97_batch_tile(prepared, options, &mut cpu_accelerator),
2093 )
2094 })
2095 .collect();
2096 }
2097
2098 prepared_tiles
2099 .into_iter()
2100 .map(|prepared| {
2101 let tile_index = prepared.tile_index;
2102 (
2103 tile_index,
2104 encode_float97_batch_tile(prepared, options, encode_accelerator),
2105 )
2106 })
2107 .collect()
2108}
2109
2110fn can_encode_float97_precomputed_tiles_batch(
2111 prepared_tiles: &[Float97BatchTile],
2112 options: &JpegToHtj2kOptions,
2113) -> bool {
2114 options.encode_options.num_layers == 1
2115 && prepared_tiles.iter().all(|tile| {
2116 tile.precomputed_components.iter().all(Option::is_some)
2117 && tile.preencoded_compact_payload.is_empty()
2118 && tile
2119 .preencoded_compact_components
2120 .iter()
2121 .all(Option::is_none)
2122 && tile.preencoded_components.iter().all(Option::is_none)
2123 && tile.prequantized_components.iter().all(Option::is_none)
2124 })
2125}
2126
2127#[allow(clippy::too_many_lines)]
2128fn encode_float97_precomputed_tiles_batch<E: J2kEncodeStageAccelerator>(
2129 prepared_tiles: Vec<Float97BatchTile>,
2130 options: &JpegToHtj2kOptions,
2131 encode_accelerator: &mut E,
2132) -> Vec<(usize, Result<EncodedTranscode, JpegToHtj2kError>)> {
2133 let mut records = Vec::with_capacity(prepared_tiles.len());
2134 let mut images = Vec::with_capacity(prepared_tiles.len());
2135
2136 for tile in prepared_tiles {
2137 let Float97BatchTile {
2138 tile_index,
2139 jpeg,
2140 decomposition_levels,
2141 all_unit_sampled,
2142 component_reports,
2143 precomputed_components,
2144 preencoded_compact_payload: _,
2145 preencoded_compact_components: _,
2146 preencoded_components: _,
2147 prequantized_components: _,
2148 float_validation_actual,
2149 float_validation_expected,
2150 timings,
2151 ..
2152 } = tile;
2153 let components = match precomputed_components
2154 .into_iter()
2155 .map(|component| {
2156 component.ok_or(JpegToHtj2kError::Validation(
2157 "9/7 precomputed batch transcode did not produce all components",
2158 ))
2159 })
2160 .collect::<Result<Vec<_>, _>>()
2161 {
2162 Ok(components) => components,
2163 Err(error) => return vec![(tile_index, Err(error))],
2164 };
2165 images.push(PrecomputedHtj2k97Image {
2166 width: jpeg.width,
2167 height: jpeg.height,
2168 bit_depth: 8,
2169 signed: false,
2170 components,
2171 });
2172 records.push(Float97PrecomputedBatchRecord {
2173 tile_index,
2174 jpeg,
2175 decomposition_levels,
2176 all_unit_sampled,
2177 component_reports,
2178 float_validation_actual,
2179 float_validation_expected,
2180 timings,
2181 });
2182 }
2183
2184 let encode_start = Instant::now();
2185 let encode_dispatch_before = encode_accelerator.dispatch_report();
2186 let native_images = images;
2187 let codestreams = {
2188 let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2189 let native_encode_options = options.encode_options.to_native();
2190 match encode_precomputed_htj2k_97_batch_with_accelerator(
2191 &native_images,
2192 &native_encode_options,
2193 &mut native_encode_accelerator,
2194 ) {
2195 Ok(codestreams) => codestreams,
2196 Err(error) => {
2197 return records
2198 .into_iter()
2199 .map(|record| (record.tile_index, Err(JpegToHtj2kError::Encode(error))))
2200 .collect();
2201 }
2202 }
2203 };
2204 let encode_dispatch_after = encode_accelerator.dispatch_report();
2205 let encode_us = encode_start.elapsed().as_micros();
2206
2207 if codestreams.len() != records.len() {
2208 return records
2209 .into_iter()
2210 .map(|record| {
2211 (
2212 record.tile_index,
2213 Err(JpegToHtj2kError::Validation(
2214 "9/7 precomputed batch encode returned the wrong tile count",
2215 )),
2216 )
2217 })
2218 .collect();
2219 }
2220
2221 records
2222 .into_iter()
2223 .zip(codestreams)
2224 .enumerate()
2225 .map(|(batch_index, (record, codestream))| {
2226 let encode_measurement = (batch_index == 0).then_some((
2227 encode_dispatch_before,
2228 encode_dispatch_after,
2229 encode_us,
2230 ));
2231 (
2232 record.tile_index,
2233 encoded_float97_precomputed_batch_record(
2234 record,
2235 codestream,
2236 options,
2237 encode_measurement,
2238 ),
2239 )
2240 })
2241 .collect()
2242}
2243
2244fn encoded_float97_precomputed_batch_record(
2245 record: Float97PrecomputedBatchRecord,
2246 codestream: Vec<u8>,
2247 options: &JpegToHtj2kOptions,
2248 encode_measurement: Option<(J2kEncodeDispatchReport, J2kEncodeDispatchReport, u128)>,
2249) -> Result<EncodedTranscode, JpegToHtj2kError> {
2250 let Float97PrecomputedBatchRecord {
2251 jpeg,
2252 decomposition_levels,
2253 all_unit_sampled,
2254 component_reports,
2255 float_validation_actual,
2256 float_validation_expected,
2257 mut timings,
2258 ..
2259 } = record;
2260
2261 if let Some((encode_dispatch_before, encode_dispatch_after, encode_us)) = encode_measurement {
2262 record_encode_dispatch_delta(&mut timings, encode_dispatch_before, encode_dispatch_after);
2263 timings.htj2k_encode_us = encode_us;
2264 }
2265 let encode_us = timings.htj2k_encode_us;
2266 let float_reference_metrics = if options.validate_against_float_reference {
2267 Some(error_metrics_i32(
2268 &float_validation_actual,
2269 &float_validation_expected,
2270 )?)
2271 } else {
2272 None
2273 };
2274
2275 Ok(EncodedTranscode {
2276 codestream,
2277 report: TranscodeReport {
2278 width: jpeg.width,
2279 height: jpeg.height,
2280 component_count: jpeg.components.len(),
2281 components: component_reports,
2282 float_reference_classification: float_reference_metrics
2283 .as_ref()
2284 .map(TranscodeValidationClassification::classify_metrics),
2285 float_reference_metrics,
2286 integer_reference_classification: None,
2287 integer_reference_metrics: None,
2288 decomposition_levels,
2289 coefficient_path: options.coefficient_path,
2290 path: transcode_path_name(all_unit_sampled, options.coefficient_path),
2291 extract_us: timings.jpeg_dct_extract_us,
2292 transform_us: 0,
2293 encode_us,
2294 timings,
2295 },
2296 })
2297}
2298
2299fn encode_integer_batch_tile<E: J2kEncodeStageAccelerator>(
2300 tile: IntegerBatchTile,
2301 options: &JpegToHtj2kOptions,
2302 encode_accelerator: &mut E,
2303) -> Result<EncodedTranscode, JpegToHtj2kError> {
2304 let mut timings = tile.timings;
2305 let components = tile
2306 .precomputed_components
2307 .into_iter()
2308 .map(|component| {
2309 component.ok_or(JpegToHtj2kError::Validation(
2310 "integer batch transcode did not produce all components",
2311 ))
2312 })
2313 .collect::<Result<Vec<_>, _>>()?;
2314 let encode_start = Instant::now();
2315 let precomputed = PrecomputedHtj2k53Image {
2316 width: tile.jpeg.width,
2317 height: tile.jpeg.height,
2318 bit_depth: 8,
2319 signed: false,
2320 components,
2321 };
2322 let encode_dispatch_before = encode_accelerator.dispatch_report();
2323 let native_precomputed = precomputed;
2324 let codestream = {
2325 let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2326 let native_encode_options = options.encode_options.to_native();
2327 encode_precomputed_htj2k_53_with_accelerator(
2328 &native_precomputed,
2329 &native_encode_options,
2330 &mut native_encode_accelerator,
2331 )
2332 .map_err(JpegToHtj2kError::Encode)?
2333 };
2334 record_encode_dispatch_delta(
2335 &mut timings,
2336 encode_dispatch_before,
2337 encode_accelerator.dispatch_report(),
2338 );
2339 let encode_us = encode_start.elapsed().as_micros();
2340 timings.htj2k_encode_us = encode_us;
2341 let integer_reference_metrics = if options.validate_against_integer_reference {
2342 Some(error_metrics_i32(
2343 &tile.integer_validation_actual,
2344 &tile.integer_validation_expected,
2345 )?)
2346 } else {
2347 None
2348 };
2349 let float_reference_metrics = if options.validate_against_float_reference {
2350 Some(error_metrics_i32(
2351 &tile.float_validation_actual,
2352 &tile.float_validation_expected,
2353 )?)
2354 } else {
2355 None
2356 };
2357
2358 Ok(EncodedTranscode {
2359 codestream,
2360 report: TranscodeReport {
2361 width: tile.jpeg.width,
2362 height: tile.jpeg.height,
2363 component_count: tile.jpeg.components.len(),
2364 components: tile.component_reports,
2365 float_reference_classification: float_reference_metrics
2366 .as_ref()
2367 .map(TranscodeValidationClassification::classify_metrics),
2368 float_reference_metrics,
2369 integer_reference_classification: integer_reference_metrics
2370 .as_ref()
2371 .map(TranscodeValidationClassification::classify_metrics),
2372 integer_reference_metrics,
2373 decomposition_levels: tile.decomposition_levels,
2374 coefficient_path: options.coefficient_path,
2375 path: transcode_path_name(tile.all_unit_sampled, options.coefficient_path),
2376 extract_us: timings.jpeg_dct_extract_us,
2377 transform_us: 0,
2378 encode_us,
2379 timings,
2380 },
2381 })
2382}
2383
2384#[allow(clippy::too_many_lines)]
2385fn encode_float97_batch_tile<E: J2kEncodeStageAccelerator>(
2386 tile: Float97BatchTile,
2387 options: &JpegToHtj2kOptions,
2388 encode_accelerator: &mut E,
2389) -> Result<EncodedTranscode, JpegToHtj2kError> {
2390 let Float97BatchTile {
2391 jpeg,
2392 decomposition_levels,
2393 all_unit_sampled,
2394 component_reports,
2395 precomputed_components,
2396 preencoded_compact_payload,
2397 preencoded_compact_components,
2398 preencoded_components,
2399 prequantized_components,
2400 float_validation_actual,
2401 float_validation_expected,
2402 mut timings,
2403 ..
2404 } = tile;
2405
2406 let encode_start = Instant::now();
2407 let encode_dispatch_before = encode_accelerator.dispatch_report();
2408 let codestream = {
2409 let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2410 let native_encode_options = options.encode_options.to_native();
2411 if preencoded_compact_components.iter().any(Option::is_some) {
2412 let components = preencoded_compact_components
2413 .into_iter()
2414 .map(|component| {
2415 component.ok_or(JpegToHtj2kError::Validation(
2416 "9/7 compact preencoded batch transcode did not produce all components",
2417 ))
2418 })
2419 .collect::<Result<Vec<_>, _>>()?;
2420 let preencoded = PreencodedHtj2k97CompactImage {
2421 width: jpeg.width,
2422 height: jpeg.height,
2423 bit_depth: 8,
2424 signed: false,
2425 payload: preencoded_compact_payload,
2426 components,
2427 };
2428 encode_preencoded_htj2k_97_compact_owned_with_accelerator(
2429 preencoded,
2430 &native_encode_options,
2431 &mut native_encode_accelerator,
2432 )
2433 .map_err(JpegToHtj2kError::Encode)?
2434 } else if preencoded_components.iter().any(Option::is_some) {
2435 let components = preencoded_components
2436 .into_iter()
2437 .map(|component| {
2438 component.ok_or(JpegToHtj2kError::Validation(
2439 "9/7 preencoded batch transcode did not produce all components",
2440 ))
2441 })
2442 .collect::<Result<Vec<_>, _>>()?;
2443 let preencoded = PreencodedHtj2k97Image {
2444 width: jpeg.width,
2445 height: jpeg.height,
2446 bit_depth: 8,
2447 signed: false,
2448 components,
2449 };
2450 encode_preencoded_htj2k_97_owned_with_accelerator(
2451 preencoded,
2452 &native_encode_options,
2453 &mut native_encode_accelerator,
2454 )
2455 .map_err(JpegToHtj2kError::Encode)?
2456 } else if prequantized_components.iter().any(Option::is_some) {
2457 let components = prequantized_components
2458 .into_iter()
2459 .map(|component| {
2460 component.ok_or(JpegToHtj2kError::Validation(
2461 "9/7 code-block batch transcode did not produce all components",
2462 ))
2463 })
2464 .collect::<Result<Vec<_>, _>>()?;
2465 let prequantized = PrequantizedHtj2k97Image {
2466 width: jpeg.width,
2467 height: jpeg.height,
2468 bit_depth: 8,
2469 signed: false,
2470 components,
2471 };
2472 let native_prequantized = prequantized;
2473 encode_prequantized_htj2k_97_with_accelerator(
2474 &native_prequantized,
2475 &native_encode_options,
2476 &mut native_encode_accelerator,
2477 )
2478 .map_err(JpegToHtj2kError::Encode)?
2479 } else {
2480 let components = precomputed_components
2481 .into_iter()
2482 .map(|component| {
2483 component.ok_or(JpegToHtj2kError::Validation(
2484 "9/7 batch transcode did not produce all components",
2485 ))
2486 })
2487 .collect::<Result<Vec<_>, _>>()?;
2488 let precomputed = PrecomputedHtj2k97Image {
2489 width: jpeg.width,
2490 height: jpeg.height,
2491 bit_depth: 8,
2492 signed: false,
2493 components,
2494 };
2495 let native_precomputed = precomputed;
2496 encode_precomputed_htj2k_97_with_accelerator(
2497 &native_precomputed,
2498 &native_encode_options,
2499 &mut native_encode_accelerator,
2500 )
2501 .map_err(JpegToHtj2kError::Encode)?
2502 }
2503 };
2504 record_encode_dispatch_delta(
2505 &mut timings,
2506 encode_dispatch_before,
2507 encode_accelerator.dispatch_report(),
2508 );
2509 let encode_us = encode_start.elapsed().as_micros();
2510 timings.htj2k_encode_us = encode_us;
2511 let float_reference_metrics = if options.validate_against_float_reference {
2512 Some(error_metrics_i32(
2513 &float_validation_actual,
2514 &float_validation_expected,
2515 )?)
2516 } else {
2517 None
2518 };
2519
2520 Ok(EncodedTranscode {
2521 codestream,
2522 report: TranscodeReport {
2523 width: jpeg.width,
2524 height: jpeg.height,
2525 component_count: jpeg.components.len(),
2526 components: component_reports,
2527 float_reference_classification: float_reference_metrics
2528 .as_ref()
2529 .map(TranscodeValidationClassification::classify_metrics),
2530 float_reference_metrics,
2531 integer_reference_classification: None,
2532 integer_reference_metrics: None,
2533 decomposition_levels,
2534 coefficient_path: options.coefficient_path,
2535 path: transcode_path_name(all_unit_sampled, options.coefficient_path),
2536 extract_us: timings.jpeg_dct_extract_us,
2537 transform_us: 0,
2538 encode_us,
2539 timings,
2540 },
2541 })
2542}
2543
2544#[allow(clippy::too_many_lines)]
2545fn jpeg_to_htj2k_with_scratch<A: DctToWaveletStageAccelerator, E: J2kEncodeStageAccelerator>(
2546 bytes: &[u8],
2547 options: &JpegToHtj2kOptions,
2548 scratch: &mut JpegToHtj2kScratch,
2549 accelerator: &mut A,
2550 encode_accelerator: &mut E,
2551) -> Result<EncodedTranscode, JpegToHtj2kError> {
2552 validate_transcode_options(options)?;
2553 let mut timings = TranscodeTimingReport {
2554 tile_count: 1,
2555 ..TranscodeTimingReport::default()
2556 };
2557
2558 let extract_start = Instant::now();
2559 let jpeg = extract_dct_blocks(bytes, DctExtractOptions::default())?;
2560 let extract_us = extract_start.elapsed().as_micros();
2561 timings.jpeg_dct_extract_us = extract_us;
2562
2563 if jpeg.components.is_empty() || jpeg.components.len() > 4 {
2564 return Err(JpegToHtj2kError::Unsupported(
2565 "unsupported JPEG component count for jpeg_to_htj2k",
2566 ));
2567 }
2568 let component_sampling =
2569 component_sampling_for_jpeg(&jpeg.components, jpeg.width, jpeg.height)?;
2570 let decomposition_levels = decomposition_levels_for_components(
2571 &jpeg.components,
2572 options.encode_options.num_decomposition_levels,
2573 )?;
2574 let all_unit_sampled = component_sampling
2575 .iter()
2576 .all(|&(x_rsiz, y_rsiz)| x_rsiz == 1 && y_rsiz == 1);
2577 let component_reports = jpeg
2578 .components
2579 .iter()
2580 .zip(component_sampling.iter().copied())
2581 .map(|(component, (x_rsiz, y_rsiz))| TranscodeComponentReport {
2582 component_index: component.component_index,
2583 width: component.width,
2584 height: component.height,
2585 block_cols: component.block_cols,
2586 block_rows: component.block_rows,
2587 x_rsiz,
2588 y_rsiz,
2589 })
2590 .collect();
2591
2592 let transform_start = Instant::now();
2593 let component_batch = transcode_component_batch(
2594 &jpeg.components,
2595 &component_sampling,
2596 decomposition_levels,
2597 options,
2598 scratch,
2599 accelerator,
2600 &mut timings,
2601 )?;
2602 let transform_us = transform_start.elapsed().as_micros();
2603 timings.dct_to_wavelet_total_us = transform_us;
2604
2605 let encode_start = Instant::now();
2606 let encode_dispatch_before = encode_accelerator.dispatch_report();
2607 let native_encode_options = options.encode_options.to_native();
2608 let codestream = match component_batch.precomputed_components {
2609 PrecomputedComponentBatch::Dwt53(components) => {
2610 let precomputed = PrecomputedHtj2k53Image {
2611 width: jpeg.width,
2612 height: jpeg.height,
2613 bit_depth: 8,
2614 signed: false,
2615 components,
2616 };
2617 let native_precomputed = precomputed;
2618 let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2619 encode_precomputed_htj2k_53_with_accelerator(
2620 &native_precomputed,
2621 &native_encode_options,
2622 &mut native_encode_accelerator,
2623 )
2624 .map_err(JpegToHtj2kError::Encode)?
2625 }
2626 PrecomputedComponentBatch::Dwt97(components) => {
2627 let precomputed = PrecomputedHtj2k97Image {
2628 width: jpeg.width,
2629 height: jpeg.height,
2630 bit_depth: 8,
2631 signed: false,
2632 components,
2633 };
2634 let native_precomputed = precomputed;
2635 let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2636 encode_precomputed_htj2k_97_with_accelerator(
2637 &native_precomputed,
2638 &native_encode_options,
2639 &mut native_encode_accelerator,
2640 )
2641 .map_err(JpegToHtj2kError::Encode)?
2642 }
2643 };
2644 record_encode_dispatch_delta(
2645 &mut timings,
2646 encode_dispatch_before,
2647 encode_accelerator.dispatch_report(),
2648 );
2649 let encode_us = encode_start.elapsed().as_micros();
2650 timings.htj2k_encode_us = encode_us;
2651
2652 Ok(EncodedTranscode {
2653 codestream,
2654 report: TranscodeReport {
2655 width: jpeg.width,
2656 height: jpeg.height,
2657 component_count: jpeg.components.len(),
2658 components: component_reports,
2659 float_reference_classification: component_batch
2660 .float_reference_metrics
2661 .as_ref()
2662 .map(TranscodeValidationClassification::classify_metrics),
2663 float_reference_metrics: component_batch.float_reference_metrics,
2664 integer_reference_classification: component_batch
2665 .integer_reference_metrics
2666 .as_ref()
2667 .map(TranscodeValidationClassification::classify_metrics),
2668 integer_reference_metrics: component_batch.integer_reference_metrics,
2669 decomposition_levels,
2670 coefficient_path: options.coefficient_path,
2671 path: transcode_path_name(all_unit_sampled, options.coefficient_path),
2672 extract_us,
2673 transform_us,
2674 encode_us,
2675 timings,
2676 },
2677 })
2678}
2679
2680fn validate_transcode_options(options: &JpegToHtj2kOptions) -> Result<(), JpegToHtj2kError> {
2681 if !options.encode_options.use_ht_block_coding {
2682 return Err(JpegToHtj2kError::Unsupported(
2683 "jpeg_to_htj2k requires HT block coding",
2684 ));
2685 }
2686 if options.encode_options.use_mct {
2687 return Err(JpegToHtj2kError::Unsupported(
2688 "jpeg_to_htj2k requires use_mct=false because JPEG components stay in native color space",
2689 ));
2690 }
2691
2692 match (options.coefficient_path, options.encode_options.reversible) {
2693 (
2694 JpegToHtj2kCoefficientPath::IntegerDirect53
2695 | JpegToHtj2kCoefficientPath::FloatDirectLinear53,
2696 true,
2697 )
2698 | (JpegToHtj2kCoefficientPath::FloatDirectLinear97, false) => Ok(()),
2699 (
2700 JpegToHtj2kCoefficientPath::IntegerDirect53
2701 | JpegToHtj2kCoefficientPath::FloatDirectLinear53,
2702 false,
2703 ) => Err(JpegToHtj2kError::Unsupported(
2704 "5/3 coefficient path requires reversible HTJ2K encode",
2705 )),
2706 (JpegToHtj2kCoefficientPath::FloatDirectLinear97, true) => {
2707 Err(JpegToHtj2kError::Unsupported(
2708 "9/7 coefficient path requires irreversible HTJ2K encode",
2709 ))
2710 }
2711 }
2712}
2713
2714struct ComponentTranscodeBatch {
2715 precomputed_components: PrecomputedComponentBatch,
2716 float_reference_metrics: Option<TranscodeValidationMetrics>,
2717 integer_reference_metrics: Option<TranscodeValidationMetrics>,
2718}
2719
2720enum PrecomputedComponentBatch {
2721 Dwt53(Vec<PrecomputedHtj2k53Component>),
2722 Dwt97(Vec<PrecomputedHtj2k97Component>),
2723}
2724
2725struct ComponentTranscodeResult {
2726 precomputed: PrecomputedComponent,
2727 float_validation_coefficients: Option<(Vec<i32>, Vec<i32>)>,
2728 integer_validation_coefficients: Option<(Vec<i32>, Vec<i32>)>,
2729}
2730
2731enum PrecomputedComponent {
2732 Dwt53(PrecomputedHtj2k53Component),
2733 Dwt97(PrecomputedHtj2k97Component),
2734}
2735
2736struct ComponentWavelet {
2737 final_ll: Vec<f64>,
2738 final_ll_width: usize,
2739 final_ll_height: usize,
2740 levels: Vec<Dwt53TwoDimensional<f64>>,
2741}
2742
2743struct ComponentWavelet97 {
2744 final_ll: Vec<f64>,
2745 final_ll_width: usize,
2746 final_ll_height: usize,
2747 levels: Vec<Dwt97TwoDimensional<f64>>,
2748}
2749
2750struct IntegerWaveletLevel {
2751 width: usize,
2752 height: usize,
2753 low_width: usize,
2754 low_height: usize,
2755 high_width: usize,
2756 high_height: usize,
2757 hl: Vec<i32>,
2758 lh: Vec<i32>,
2759 hh: Vec<i32>,
2760}
2761
2762struct IntegerWavelet {
2763 final_ll: Vec<i32>,
2764 final_ll_width: usize,
2765 final_ll_height: usize,
2766 levels: Vec<IntegerWaveletLevel>,
2767}
2768
2769fn transcode_component_batch(
2770 components: &[JpegDctComponent],
2771 component_sampling: &[(u8, u8)],
2772 decomposition_levels: u8,
2773 options: &JpegToHtj2kOptions,
2774 scratch: &mut JpegToHtj2kScratch,
2775 accelerator: &mut impl DctToWaveletStageAccelerator,
2776 timings: &mut TranscodeTimingReport,
2777) -> Result<ComponentTranscodeBatch, JpegToHtj2kError> {
2778 if matches!(
2779 options.coefficient_path,
2780 JpegToHtj2kCoefficientPath::FloatDirectLinear97
2781 ) && options.validate_against_integer_reference
2782 {
2783 return Err(JpegToHtj2kError::Unsupported(
2784 "integer reversible validation is only defined for 5/3 coefficient paths",
2785 ));
2786 }
2787
2788 if matches!(
2789 options.coefficient_path,
2790 JpegToHtj2kCoefficientPath::IntegerDirect53
2791 ) {
2792 return transcode_integer_component_batch(
2793 components,
2794 component_sampling,
2795 decomposition_levels,
2796 options,
2797 scratch,
2798 accelerator,
2799 timings,
2800 );
2801 }
2802
2803 let mut precomputed_53 = Vec::with_capacity(components.len());
2804 let mut precomputed_97 = Vec::with_capacity(components.len());
2805 let mut float_validation_actual = Vec::new();
2806 let mut float_validation_expected = Vec::new();
2807 let mut integer_validation_actual = Vec::new();
2808 let mut integer_validation_expected = Vec::new();
2809
2810 for (component, (x_rsiz, y_rsiz)) in components.iter().zip(component_sampling.iter().copied()) {
2811 let component_result = component_to_precomputed_htj2k(
2812 component,
2813 x_rsiz,
2814 y_rsiz,
2815 decomposition_levels,
2816 options,
2817 scratch,
2818 accelerator,
2819 timings,
2820 )?;
2821 match component_result.precomputed {
2822 PrecomputedComponent::Dwt53(precomputed) => precomputed_53.push(precomputed),
2823 PrecomputedComponent::Dwt97(precomputed) => precomputed_97.push(precomputed),
2824 }
2825 if let Some((actual, expected)) = component_result.float_validation_coefficients {
2826 float_validation_actual.extend(actual);
2827 float_validation_expected.extend(expected);
2828 }
2829 if let Some((actual, expected)) = component_result.integer_validation_coefficients {
2830 integer_validation_actual.extend(actual);
2831 integer_validation_expected.extend(expected);
2832 }
2833 }
2834
2835 let float_reference_metrics = if options.validate_against_float_reference {
2836 Some(error_metrics_i32(
2837 &float_validation_actual,
2838 &float_validation_expected,
2839 )?)
2840 } else {
2841 None
2842 };
2843 let integer_reference_metrics = if options.validate_against_integer_reference {
2844 Some(error_metrics_i32(
2845 &integer_validation_actual,
2846 &integer_validation_expected,
2847 )?)
2848 } else {
2849 None
2850 };
2851
2852 let precomputed_components = if matches!(
2853 options.coefficient_path,
2854 JpegToHtj2kCoefficientPath::FloatDirectLinear97
2855 ) {
2856 PrecomputedComponentBatch::Dwt97(precomputed_97)
2857 } else {
2858 PrecomputedComponentBatch::Dwt53(precomputed_53)
2859 };
2860
2861 Ok(ComponentTranscodeBatch {
2862 precomputed_components,
2863 float_reference_metrics,
2864 integer_reference_metrics,
2865 })
2866}
2867
2868fn transcode_integer_component_batch(
2869 components: &[JpegDctComponent],
2870 component_sampling: &[(u8, u8)],
2871 decomposition_levels: u8,
2872 options: &JpegToHtj2kOptions,
2873 scratch: &mut JpegToHtj2kScratch,
2874 accelerator: &mut impl DctToWaveletStageAccelerator,
2875 timings: &mut TranscodeTimingReport,
2876) -> Result<ComponentTranscodeBatch, JpegToHtj2kError> {
2877 let mut precomputed_53: Vec<Option<PrecomputedHtj2k53Component>> =
2878 (0..components.len()).map(|_| None).collect();
2879 let mut float_validation_actual = Vec::new();
2880 let mut float_validation_expected = Vec::new();
2881 let mut integer_validation_actual = Vec::new();
2882 let mut integer_validation_expected = Vec::new();
2883
2884 for group in same_geometry_component_groups(components) {
2885 let group_wavelets = integer_wavelets_for_component_group(
2886 &group,
2887 components,
2888 decomposition_levels,
2889 scratch,
2890 accelerator,
2891 timings,
2892 )?;
2893 for (component_index, wavelet) in group.into_iter().zip(group_wavelets) {
2894 let component = &components[component_index];
2895 let (x_rsiz, y_rsiz) = component_sampling[component_index];
2896 let actual_coefficients = flatten_integer_wavelet(&wavelet);
2897 precomputed_53[component_index] = Some(PrecomputedHtj2k53Component {
2898 x_rsiz,
2899 y_rsiz,
2900 dwt: j2k_dwt_from_integer_wavelet(&wavelet),
2901 });
2902
2903 if options.validate_against_float_reference {
2904 float_validation_actual.extend(actual_coefficients.clone());
2905 float_validation_expected.extend(float_reference_coefficients(
2906 component,
2907 decomposition_levels,
2908 scratch,
2909 )?);
2910 }
2911 if options.validate_against_integer_reference {
2912 integer_validation_actual.extend(actual_coefficients);
2913 integer_validation_expected.extend(integer_reference_coefficients(
2914 component,
2915 decomposition_levels,
2916 )?);
2917 }
2918 }
2919 }
2920
2921 let float_reference_metrics = if options.validate_against_float_reference {
2922 Some(error_metrics_i32(
2923 &float_validation_actual,
2924 &float_validation_expected,
2925 )?)
2926 } else {
2927 None
2928 };
2929 let integer_reference_metrics = if options.validate_against_integer_reference {
2930 Some(error_metrics_i32(
2931 &integer_validation_actual,
2932 &integer_validation_expected,
2933 )?)
2934 } else {
2935 None
2936 };
2937 let precomputed_components = precomputed_53
2938 .into_iter()
2939 .map(|component| {
2940 component.ok_or(JpegToHtj2kError::Validation(
2941 "integer transcode did not produce all components",
2942 ))
2943 })
2944 .collect::<Result<Vec<_>, _>>()?;
2945
2946 Ok(ComponentTranscodeBatch {
2947 precomputed_components: PrecomputedComponentBatch::Dwt53(precomputed_components),
2948 float_reference_metrics,
2949 integer_reference_metrics,
2950 })
2951}
2952
2953fn integer_wavelets_for_component_group(
2954 group: &[usize],
2955 components: &[JpegDctComponent],
2956 decomposition_levels: u8,
2957 scratch: &mut JpegToHtj2kScratch,
2958 accelerator: &mut impl DctToWaveletStageAccelerator,
2959 timings: &mut TranscodeTimingReport,
2960) -> Result<Vec<IntegerWavelet>, JpegToHtj2kError> {
2961 let jobs = group
2962 .iter()
2963 .map(|&component_index| integer_dct_job_for_component(&components[component_index]))
2964 .collect::<Result<Vec<_>, _>>()?;
2965 record_batch_attempt(timings, group.len());
2966 let accelerator_start = Instant::now();
2967 let accelerated_first_levels = accelerator
2968 .dct_grid_to_reversible_dwt53_batch(&jobs)
2969 .map_err(JpegToHtj2kError::Accelerator)?;
2970 timings.dct_to_wavelet_accelerator_us = timings
2971 .dct_to_wavelet_accelerator_us
2972 .saturating_add(accelerator_start.elapsed().as_micros());
2973
2974 if let Some(first_levels) = accelerated_first_levels {
2975 if first_levels.len() != group.len() {
2976 return Err(JpegToHtj2kError::Validation(
2977 "reversible 5/3 batch accelerator returned wrong component count",
2978 ));
2979 }
2980 timings.component_count = timings.component_count.saturating_add(group.len());
2981 record_accelerator_dispatch(timings, group.len());
2982 let decompose_start = Instant::now();
2983 let wavelets = first_levels
2984 .into_iter()
2985 .map(|first_level| integer_wavelet_from_first_level(first_level, decomposition_levels))
2986 .collect();
2987 timings.dwt_decompose_us = timings
2988 .dwt_decompose_us
2989 .saturating_add(decompose_start.elapsed().as_micros());
2990 return Ok(wavelets);
2991 }
2992
2993 group
2994 .iter()
2995 .map(|&component_index| {
2996 integer_direct_wavelet_from_component(
2997 &components[component_index],
2998 decomposition_levels,
2999 scratch,
3000 accelerator,
3001 timings,
3002 )
3003 })
3004 .collect()
3005}
3006
3007fn same_geometry_component_groups(components: &[JpegDctComponent]) -> Vec<Vec<usize>> {
3008 let mut assigned = vec![false; components.len()];
3009 let mut groups = Vec::new();
3010
3011 for component_index in 0..components.len() {
3012 if assigned[component_index] {
3013 continue;
3014 }
3015 assigned[component_index] = true;
3016 let mut group = vec![component_index];
3017 for candidate_index in component_index + 1..components.len() {
3018 if !assigned[candidate_index]
3019 && same_component_geometry(
3020 &components[component_index],
3021 &components[candidate_index],
3022 )
3023 {
3024 assigned[candidate_index] = true;
3025 group.push(candidate_index);
3026 }
3027 }
3028 groups.push(group);
3029 }
3030
3031 groups
3032}
3033
3034fn same_component_geometry(left: &JpegDctComponent, right: &JpegDctComponent) -> bool {
3035 left.width == right.width
3036 && left.height == right.height
3037 && left.block_cols == right.block_cols
3038 && left.block_rows == right.block_rows
3039}
3040
3041fn integer_dct_job_for_component(
3042 component: &JpegDctComponent,
3043) -> Result<DctGridToReversibleDwt53Job<'_>, JpegToHtj2kError> {
3044 validate_component_block_grid(component)?;
3045 Ok(DctGridToReversibleDwt53Job {
3046 dequantized_blocks: &component.dequantized_blocks,
3047 block_cols: component.block_cols as usize,
3048 block_rows: component.block_rows as usize,
3049 width: component.width as usize,
3050 height: component.height as usize,
3051 })
3052}
3053
3054#[allow(clippy::too_many_arguments)]
3055fn component_to_precomputed_htj2k(
3056 component: &JpegDctComponent,
3057 x_rsiz: u8,
3058 y_rsiz: u8,
3059 decomposition_levels: u8,
3060 options: &JpegToHtj2kOptions,
3061 scratch: &mut JpegToHtj2kScratch,
3062 accelerator: &mut impl DctToWaveletStageAccelerator,
3063 timings: &mut TranscodeTimingReport,
3064) -> Result<ComponentTranscodeResult, JpegToHtj2kError> {
3065 let (dwt, actual_coefficients) = match options.coefficient_path {
3066 JpegToHtj2kCoefficientPath::IntegerDirect53 => {
3067 let wavelet = integer_direct_wavelet_from_component(
3068 component,
3069 decomposition_levels,
3070 scratch,
3071 accelerator,
3072 timings,
3073 )?;
3074 (
3075 PrecomputedComponent::Dwt53(PrecomputedHtj2k53Component {
3076 x_rsiz,
3077 y_rsiz,
3078 dwt: j2k_dwt_from_integer_wavelet(&wavelet),
3079 }),
3080 flatten_integer_wavelet(&wavelet),
3081 )
3082 }
3083 JpegToHtj2kCoefficientPath::FloatDirectLinear53 => {
3084 let wavelet = float_direct_wavelet_from_component(
3085 component,
3086 decomposition_levels,
3087 scratch,
3088 accelerator,
3089 timings,
3090 )?;
3091 (
3092 PrecomputedComponent::Dwt53(PrecomputedHtj2k53Component {
3093 x_rsiz,
3094 y_rsiz,
3095 dwt: j2k_dwt_from_wavelet(
3096 &wavelet,
3097 component.width as usize,
3098 component.height as usize,
3099 ),
3100 }),
3101 rounded_wavelet_i32(&wavelet)?,
3102 )
3103 }
3104 JpegToHtj2kCoefficientPath::FloatDirectLinear97 => {
3105 let wavelet = float_direct_97_wavelet_from_component(
3106 component,
3107 decomposition_levels,
3108 scratch,
3109 accelerator,
3110 timings,
3111 )?;
3112 (
3113 PrecomputedComponent::Dwt97(PrecomputedHtj2k97Component {
3114 x_rsiz,
3115 y_rsiz,
3116 dwt: j2k_dwt97_from_wavelet(
3117 &wavelet,
3118 component.width as usize,
3119 component.height as usize,
3120 ),
3121 }),
3122 rounded_wavelet97_i32(&wavelet)?,
3123 )
3124 }
3125 };
3126 let float_validation_coefficients = if options.validate_against_float_reference {
3127 let expected = match options.coefficient_path {
3128 JpegToHtj2kCoefficientPath::FloatDirectLinear97 => {
3129 float97_reference_coefficients(component, decomposition_levels, scratch)?
3130 }
3131 JpegToHtj2kCoefficientPath::IntegerDirect53
3132 | JpegToHtj2kCoefficientPath::FloatDirectLinear53 => {
3133 float_reference_coefficients(component, decomposition_levels, scratch)?
3134 }
3135 };
3136 Some((actual_coefficients.clone(), expected))
3137 } else {
3138 None
3139 };
3140 let integer_validation_coefficients = if options.validate_against_integer_reference {
3141 let expected = integer_reference_coefficients(component, decomposition_levels)?;
3142 Some((actual_coefficients, expected))
3143 } else {
3144 None
3145 };
3146
3147 Ok(ComponentTranscodeResult {
3148 precomputed: dwt,
3149 float_validation_coefficients,
3150 integer_validation_coefficients,
3151 })
3152}
3153
3154fn transcode_path_name(
3155 all_unit_sampled: bool,
3156 coefficient_path: JpegToHtj2kCoefficientPath,
3157) -> &'static str {
3158 match (all_unit_sampled, coefficient_path) {
3159 (true, JpegToHtj2kCoefficientPath::IntegerDirect53) => {
3160 "full_resolution_components_integer_direct_53"
3161 }
3162 (false, JpegToHtj2kCoefficientPath::IntegerDirect53) => {
3163 "native_component_sampling_integer_direct_53"
3164 }
3165 (true, JpegToHtj2kCoefficientPath::FloatDirectLinear53) => {
3166 "full_resolution_components_float_direct_53"
3167 }
3168 (false, JpegToHtj2kCoefficientPath::FloatDirectLinear53) => {
3169 "native_component_sampling_float_direct_53"
3170 }
3171 (true, JpegToHtj2kCoefficientPath::FloatDirectLinear97) => {
3172 "full_resolution_components_float_direct_97"
3173 }
3174 (false, JpegToHtj2kCoefficientPath::FloatDirectLinear97) => {
3175 "native_component_sampling_float_direct_97"
3176 }
3177 }
3178}
3179
3180fn float_direct_wavelet_from_component(
3181 component: &JpegDctComponent,
3182 decomposition_levels: u8,
3183 scratch: &mut JpegToHtj2kScratch,
3184 accelerator: &mut impl DctToWaveletStageAccelerator,
3185 timings: &mut TranscodeTimingReport,
3186) -> Result<ComponentWavelet, JpegToHtj2kError> {
3187 timings.component_count = timings.component_count.saturating_add(1);
3188 let repack_start = Instant::now();
3189 dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3190 timings.jpeg_dct_repack_us = timings
3191 .jpeg_dct_repack_us
3192 .saturating_add(repack_start.elapsed().as_micros());
3193 let blocks = &scratch.dct_blocks_f64;
3194 let job = DctGridToDwt53Job {
3195 blocks,
3196 block_cols: component.block_cols as usize,
3197 block_rows: component.block_rows as usize,
3198 width: component.width as usize,
3199 height: component.height as usize,
3200 };
3201 record_accelerator_attempt(timings, 1);
3202 let accelerator_start = Instant::now();
3203 let accelerated = accelerator
3204 .dct_grid_to_dwt53(job)
3205 .map_err(JpegToHtj2kError::Accelerator)?;
3206 timings.dct_to_wavelet_accelerator_us = timings
3207 .dct_to_wavelet_accelerator_us
3208 .saturating_add(accelerator_start.elapsed().as_micros());
3209 let bands = if let Some(bands) = accelerated {
3210 record_accelerator_dispatch(timings, 1);
3211 bands
3212 } else {
3213 record_cpu_fallback(timings, 1);
3214 let fallback_start = Instant::now();
3215 let bands = dct8x8_blocks_to_dwt53_float_linear_with_scratch(
3216 blocks,
3217 component.block_cols as usize,
3218 component.block_rows as usize,
3219 component.width as usize,
3220 component.height as usize,
3221 &mut scratch.dct53_grid,
3222 )
3223 .map_err(dct53_grid_error)?;
3224 timings.dct_to_wavelet_cpu_fallback_us = timings
3225 .dct_to_wavelet_cpu_fallback_us
3226 .saturating_add(fallback_start.elapsed().as_micros());
3227 bands
3228 };
3229 let decompose_start = Instant::now();
3230 let wavelet = decompose_from_first_level(bands, usize::from(decomposition_levels));
3231 timings.dwt_decompose_us = timings
3232 .dwt_decompose_us
3233 .saturating_add(decompose_start.elapsed().as_micros());
3234 Ok(wavelet)
3235}
3236
3237fn float_direct_97_wavelet_from_component(
3238 component: &JpegDctComponent,
3239 decomposition_levels: u8,
3240 scratch: &mut JpegToHtj2kScratch,
3241 accelerator: &mut impl DctToWaveletStageAccelerator,
3242 timings: &mut TranscodeTimingReport,
3243) -> Result<ComponentWavelet97, JpegToHtj2kError> {
3244 timings.component_count = timings.component_count.saturating_add(1);
3245 let repack_start = Instant::now();
3246 dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3247 timings.jpeg_dct_repack_us = timings
3248 .jpeg_dct_repack_us
3249 .saturating_add(repack_start.elapsed().as_micros());
3250 let blocks = &scratch.dct_blocks_f64;
3251 let job = DctGridToDwt97Job {
3252 blocks,
3253 block_cols: component.block_cols as usize,
3254 block_rows: component.block_rows as usize,
3255 width: component.width as usize,
3256 height: component.height as usize,
3257 };
3258 record_accelerator_attempt(timings, 1);
3259 let accelerator_start = Instant::now();
3260 let accelerated = accelerator
3261 .dct_grid_to_dwt97(job)
3262 .map_err(JpegToHtj2kError::Accelerator)?;
3263 timings.dct_to_wavelet_accelerator_us = timings
3264 .dct_to_wavelet_accelerator_us
3265 .saturating_add(accelerator_start.elapsed().as_micros());
3266 let bands = if let Some(bands) = accelerated {
3267 record_accelerator_dispatch(timings, 1);
3268 bands
3269 } else {
3270 record_cpu_fallback(timings, 1);
3271 let fallback_start = Instant::now();
3272 let bands = dct8x8_blocks_then_dwt97_float_with_scratch(
3273 blocks,
3274 component.block_cols as usize,
3275 component.block_rows as usize,
3276 component.width as usize,
3277 component.height as usize,
3278 &mut scratch.dct97_grid,
3279 )
3280 .map_err(dct97_grid_error)?;
3281 timings.dct_to_wavelet_cpu_fallback_us = timings
3282 .dct_to_wavelet_cpu_fallback_us
3283 .saturating_add(fallback_start.elapsed().as_micros());
3284 bands
3285 };
3286 let decompose_start = Instant::now();
3287 let wavelet = decompose_97_from_first_level_with_scratch(
3288 bands,
3289 usize::from(decomposition_levels),
3290 &mut scratch.dct97_grid,
3291 );
3292 timings.dwt_decompose_us = timings
3293 .dwt_decompose_us
3294 .saturating_add(decompose_start.elapsed().as_micros());
3295 Ok(wavelet)
3296}
3297
3298fn float_reference_coefficients(
3299 component: &JpegDctComponent,
3300 decomposition_levels: u8,
3301 scratch: &mut JpegToHtj2kScratch,
3302) -> Result<Vec<i32>, JpegToHtj2kError> {
3303 dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3304 let blocks = &scratch.dct_blocks_f64;
3305 let first_reference_level = dct8x8_blocks_then_dwt53_float(
3306 blocks,
3307 component.block_cols as usize,
3308 component.block_rows as usize,
3309 component.width as usize,
3310 component.height as usize,
3311 )
3312 .map_err(dct53_grid_error)?;
3313 let reference =
3314 decompose_from_first_level(first_reference_level, usize::from(decomposition_levels));
3315 rounded_wavelet_i32(&reference)
3316}
3317
3318fn float97_reference_coefficients(
3319 component: &JpegDctComponent,
3320 decomposition_levels: u8,
3321 scratch: &mut JpegToHtj2kScratch,
3322) -> Result<Vec<i32>, JpegToHtj2kError> {
3323 dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3324 let blocks = &scratch.dct_blocks_f64;
3325 let first_reference_level = dct8x8_blocks_then_dwt97_float(
3326 blocks,
3327 component.block_cols as usize,
3328 component.block_rows as usize,
3329 component.width as usize,
3330 component.height as usize,
3331 )
3332 .map_err(dct97_grid_error)?;
3333 let reference =
3334 decompose_97_from_first_level(first_reference_level, usize::from(decomposition_levels));
3335 rounded_wavelet97_i32(&reference)
3336}
3337
3338fn decompose_from_first_level(
3339 first_level: Dwt53TwoDimensional<f64>,
3340 decomposition_levels: usize,
3341) -> ComponentWavelet {
3342 let mut wavelet = ComponentWavelet {
3343 final_ll: first_level.ll.clone(),
3344 final_ll_width: first_level.low_width,
3345 final_ll_height: first_level.low_height,
3346 levels: vec![first_level],
3347 };
3348
3349 while wavelet.levels.len() < decomposition_levels {
3350 let next = linearized_53_2d_from_plane(
3351 &wavelet.final_ll,
3352 wavelet.final_ll_width,
3353 wavelet.final_ll_height,
3354 );
3355 wavelet.final_ll.clone_from(&next.ll);
3356 wavelet.final_ll_width = next.low_width;
3357 wavelet.final_ll_height = next.low_height;
3358 wavelet.levels.push(next);
3359 }
3360
3361 wavelet
3362}
3363
3364fn decompose_97_from_first_level(
3365 first_level: Dwt97TwoDimensional<f64>,
3366 decomposition_levels: usize,
3367) -> ComponentWavelet97 {
3368 let mut scratch = Dct97GridScratch::default();
3369 decompose_97_from_first_level_with_scratch(first_level, decomposition_levels, &mut scratch)
3370}
3371
3372fn decompose_97_from_first_level_with_scratch(
3373 first_level: Dwt97TwoDimensional<f64>,
3374 decomposition_levels: usize,
3375 scratch: &mut Dct97GridScratch,
3376) -> ComponentWavelet97 {
3377 let mut wavelet = ComponentWavelet97 {
3378 final_ll: first_level.ll.clone(),
3379 final_ll_width: first_level.low_width,
3380 final_ll_height: first_level.low_height,
3381 levels: vec![first_level],
3382 };
3383
3384 while wavelet.levels.len() < decomposition_levels {
3385 let next = linearized_97_2d_from_plane_with_scratch(
3386 &wavelet.final_ll,
3387 wavelet.final_ll_width,
3388 wavelet.final_ll_height,
3389 scratch,
3390 );
3391 wavelet.final_ll.clone_from(&next.ll);
3392 wavelet.final_ll_width = next.low_width;
3393 wavelet.final_ll_height = next.low_height;
3394 wavelet.levels.push(next);
3395 }
3396
3397 wavelet
3398}
3399
3400fn j2k_dwt_from_wavelet(
3401 wavelet: &ComponentWavelet,
3402 width: usize,
3403 height: usize,
3404) -> J2kForwardDwt53Output {
3405 let mut current_width = width;
3406 let mut current_height = height;
3407 let mut levels = Vec::with_capacity(wavelet.levels.len());
3408
3409 for level in &wavelet.levels {
3410 levels.push(J2kForwardDwt53Level {
3411 hl: level.hl.iter().map(|&value| value as f32).collect(),
3412 lh: level.lh.iter().map(|&value| value as f32).collect(),
3413 hh: level.hh.iter().map(|&value| value as f32).collect(),
3414 width: current_width as u32,
3415 height: current_height as u32,
3416 low_width: level.low_width as u32,
3417 low_height: level.low_height as u32,
3418 high_width: level.high_width as u32,
3419 high_height: level.high_height as u32,
3420 });
3421 current_width = level.low_width;
3422 current_height = level.low_height;
3423 }
3424 levels.reverse();
3425
3426 J2kForwardDwt53Output {
3427 ll: wavelet.final_ll.iter().map(|&value| value as f32).collect(),
3428 ll_width: wavelet.final_ll_width as u32,
3429 ll_height: wavelet.final_ll_height as u32,
3430 levels,
3431 }
3432}
3433
3434fn j2k_dwt97_from_wavelet(
3435 wavelet: &ComponentWavelet97,
3436 width: usize,
3437 height: usize,
3438) -> J2kForwardDwt97Output {
3439 let mut current_width = width;
3440 let mut current_height = height;
3441 let mut levels = Vec::with_capacity(wavelet.levels.len());
3442
3443 for level in &wavelet.levels {
3444 levels.push(J2kForwardDwt97Level {
3445 hl: level.hl.iter().map(|&value| value as f32).collect(),
3446 lh: level.lh.iter().map(|&value| value as f32).collect(),
3447 hh: level.hh.iter().map(|&value| value as f32).collect(),
3448 width: current_width as u32,
3449 height: current_height as u32,
3450 low_width: level.low_width as u32,
3451 low_height: level.low_height as u32,
3452 high_width: level.high_width as u32,
3453 high_height: level.high_height as u32,
3454 });
3455 current_width = level.low_width;
3456 current_height = level.low_height;
3457 }
3458 levels.reverse();
3459
3460 J2kForwardDwt97Output {
3461 ll: wavelet.final_ll.iter().map(|&value| value as f32).collect(),
3462 ll_width: wavelet.final_ll_width as u32,
3463 ll_height: wavelet.final_ll_height as u32,
3464 levels,
3465 }
3466}
3467
3468fn j2k_dwt_from_integer_wavelet(wavelet: &IntegerWavelet) -> J2kForwardDwt53Output {
3469 let mut levels = Vec::with_capacity(wavelet.levels.len());
3470 for level in &wavelet.levels {
3471 levels.push(J2kForwardDwt53Level {
3472 hl: level.hl.iter().map(|&value| value as f32).collect(),
3473 lh: level.lh.iter().map(|&value| value as f32).collect(),
3474 hh: level.hh.iter().map(|&value| value as f32).collect(),
3475 width: level.width as u32,
3476 height: level.height as u32,
3477 low_width: level.low_width as u32,
3478 low_height: level.low_height as u32,
3479 high_width: level.high_width as u32,
3480 high_height: level.high_height as u32,
3481 });
3482 }
3483 levels.reverse();
3484
3485 J2kForwardDwt53Output {
3486 ll: wavelet.final_ll.iter().map(|&value| value as f32).collect(),
3487 ll_width: wavelet.final_ll_width as u32,
3488 ll_height: wavelet.final_ll_height as u32,
3489 levels,
3490 }
3491}
3492
3493fn rounded_wavelet_i32(wavelet: &ComponentWavelet) -> Result<Vec<i32>, JpegToHtj2kError> {
3494 let coefficient_count = wavelet.final_ll.len()
3495 + wavelet
3496 .levels
3497 .iter()
3498 .map(|level| level.hl.len() + level.lh.len() + level.hh.len())
3499 .sum::<usize>();
3500 let mut output = Vec::with_capacity(coefficient_count);
3501 append_rounded_i32(&wavelet.final_ll, &mut output)?;
3502 for level in wavelet.levels.iter().rev() {
3503 append_rounded_i32(&level.hl, &mut output)?;
3504 append_rounded_i32(&level.lh, &mut output)?;
3505 append_rounded_i32(&level.hh, &mut output)?;
3506 }
3507 Ok(output)
3508}
3509
3510fn rounded_wavelet97_i32(wavelet: &ComponentWavelet97) -> Result<Vec<i32>, JpegToHtj2kError> {
3511 let coefficient_count = wavelet.final_ll.len()
3512 + wavelet
3513 .levels
3514 .iter()
3515 .map(|level| level.hl.len() + level.lh.len() + level.hh.len())
3516 .sum::<usize>();
3517 let mut output = Vec::with_capacity(coefficient_count);
3518 append_rounded_i32(&wavelet.final_ll, &mut output)?;
3519 for level in wavelet.levels.iter().rev() {
3520 append_rounded_i32(&level.hl, &mut output)?;
3521 append_rounded_i32(&level.lh, &mut output)?;
3522 append_rounded_i32(&level.hh, &mut output)?;
3523 }
3524 Ok(output)
3525}
3526
3527fn integer_direct_wavelet_from_component(
3528 component: &JpegDctComponent,
3529 decomposition_levels: u8,
3530 scratch: &mut JpegToHtj2kScratch,
3531 accelerator: &mut impl DctToWaveletStageAccelerator,
3532 timings: &mut TranscodeTimingReport,
3533) -> Result<IntegerWavelet, JpegToHtj2kError> {
3534 let job = integer_dct_job_for_component(component)?;
3535 timings.component_count = timings.component_count.saturating_add(1);
3536 record_accelerator_attempt(timings, 1);
3537 let accelerator_start = Instant::now();
3538 let accelerated_first_level = accelerator
3539 .dct_grid_to_reversible_dwt53(job)
3540 .map_err(JpegToHtj2kError::Accelerator)?;
3541 timings.dct_to_wavelet_accelerator_us = timings
3542 .dct_to_wavelet_accelerator_us
3543 .saturating_add(accelerator_start.elapsed().as_micros());
3544 if let Some(first_level) = accelerated_first_level {
3545 record_accelerator_dispatch(timings, 1);
3546 let decompose_start = Instant::now();
3547 let wavelet = integer_wavelet_from_first_level(first_level, decomposition_levels);
3548 timings.dwt_decompose_us = timings
3549 .dwt_decompose_us
3550 .saturating_add(decompose_start.elapsed().as_micros());
3551 return Ok(wavelet);
3552 }
3553
3554 scratch.integer_idct_blocks.clear();
3555 scratch
3556 .integer_idct_blocks
3557 .resize_with(component.dequantized_blocks.len(), || None);
3558 record_cpu_fallback(timings, 1);
3559 let fallback_start = Instant::now();
3560 let (final_ll, final_ll_width, final_ll_height, first_level) =
3561 integer_direct_first_level_from_component(
3562 component,
3563 &mut scratch.integer_idct_blocks,
3564 &mut scratch.integer_row,
3565 )?;
3566 timings.dct_to_wavelet_cpu_fallback_us = timings
3567 .dct_to_wavelet_cpu_fallback_us
3568 .saturating_add(fallback_start.elapsed().as_micros());
3569 let decompose_start = Instant::now();
3570 let wavelet = integer_wavelet_from_first_parts(
3571 final_ll,
3572 final_ll_width,
3573 final_ll_height,
3574 first_level,
3575 decomposition_levels,
3576 );
3577 timings.dwt_decompose_us = timings
3578 .dwt_decompose_us
3579 .saturating_add(decompose_start.elapsed().as_micros());
3580 Ok(wavelet)
3581}
3582
3583fn integer_wavelet_from_first_level(
3584 first_level: ReversibleDwt53FirstLevel,
3585 decomposition_levels: u8,
3586) -> IntegerWavelet {
3587 let (final_ll, final_ll_width, final_ll_height, first_level) =
3588 integer_wavelet_first_level_from_accelerated(first_level);
3589 integer_wavelet_from_first_parts(
3590 final_ll,
3591 final_ll_width,
3592 final_ll_height,
3593 first_level,
3594 decomposition_levels,
3595 )
3596}
3597
3598fn integer_wavelet_from_first_parts(
3599 mut final_ll: Vec<i32>,
3600 mut final_ll_width: usize,
3601 mut final_ll_height: usize,
3602 first_level: IntegerWaveletLevel,
3603 decomposition_levels: u8,
3604) -> IntegerWavelet {
3605 let mut levels = vec![first_level];
3606
3607 let remaining_levels = usize::from(decomposition_levels.saturating_sub(1));
3608 if remaining_levels > 0 {
3609 let tail =
3610 reversible_dwt53_i32(final_ll, final_ll_width, final_ll_height, remaining_levels);
3611 final_ll = tail.final_ll;
3612 final_ll_width = tail.final_ll_width;
3613 final_ll_height = tail.final_ll_height;
3614 levels.extend(tail.levels);
3615 }
3616
3617 IntegerWavelet {
3618 final_ll,
3619 final_ll_width,
3620 final_ll_height,
3621 levels,
3622 }
3623}
3624
3625fn integer_wavelet_first_level_from_accelerated(
3626 first_level: ReversibleDwt53FirstLevel,
3627) -> (Vec<i32>, usize, usize, IntegerWaveletLevel) {
3628 let level = IntegerWaveletLevel {
3629 width: first_level.low_width + first_level.high_width,
3630 height: first_level.low_height + first_level.high_height,
3631 low_width: first_level.low_width,
3632 low_height: first_level.low_height,
3633 high_width: first_level.high_width,
3634 high_height: first_level.high_height,
3635 hl: first_level.hl,
3636 lh: first_level.lh,
3637 hh: first_level.hh,
3638 };
3639 (
3640 first_level.ll,
3641 first_level.low_width,
3642 first_level.low_height,
3643 level,
3644 )
3645}
3646
3647fn integer_direct_first_level_from_component(
3648 component: &JpegDctComponent,
3649 idct_blocks: &mut [Option<[i32; 64]>],
3650 row: &mut Vec<i32>,
3651) -> Result<(Vec<i32>, usize, usize, IntegerWaveletLevel), JpegToHtj2kError> {
3652 let width = component.width as usize;
3653 let height = component.height as usize;
3654 let low_width = width.div_ceil(2);
3655 let low_height = height.div_ceil(2);
3656 let high_width = width / 2;
3657 let high_height = height / 2;
3658
3659 let mut ll = Vec::with_capacity(low_width * low_height);
3660 let mut hl = Vec::with_capacity(high_width * low_height);
3661 let mut lh = Vec::with_capacity(low_width * high_height);
3662 let mut hh = Vec::with_capacity(high_width * high_height);
3663 row.clear();
3664 if row.capacity() < width {
3665 row.reserve(width - row.capacity());
3666 }
3667
3668 for output_y in 0..low_height {
3669 row.clear();
3670 for x in 0..width {
3671 row.push(vertical_53_i32_at(
3672 component,
3673 idct_blocks,
3674 x,
3675 output_y,
3676 true,
3677 )?);
3678 }
3679 reversible_lift_53_i32(row);
3680 ll.extend(row.iter().step_by(2).copied());
3681 hl.extend(row.iter().skip(1).step_by(2).copied());
3682 }
3683
3684 for output_y in 0..high_height {
3685 row.clear();
3686 for x in 0..width {
3687 row.push(vertical_53_i32_at(
3688 component,
3689 idct_blocks,
3690 x,
3691 output_y,
3692 false,
3693 )?);
3694 }
3695 reversible_lift_53_i32(row);
3696 lh.extend(row.iter().step_by(2).copied());
3697 hh.extend(row.iter().skip(1).step_by(2).copied());
3698 }
3699
3700 let level = IntegerWaveletLevel {
3701 width,
3702 height,
3703 low_width,
3704 low_height,
3705 high_width,
3706 high_height,
3707 hl,
3708 lh,
3709 hh,
3710 };
3711
3712 Ok((ll, low_width, low_height, level))
3713}
3714
3715fn vertical_53_i32_at(
3716 component: &JpegDctComponent,
3717 idct_blocks: &mut [Option<[i32; 64]>],
3718 x: usize,
3719 output_y: usize,
3720 low_pass: bool,
3721) -> Result<i32, JpegToHtj2kError> {
3722 if low_pass {
3723 vertical_low_53_i32_at(component, idct_blocks, x, output_y)
3724 } else {
3725 vertical_high_53_i32_at(component, idct_blocks, x, output_y)
3726 }
3727}
3728
3729fn vertical_low_53_i32_at(
3730 component: &JpegDctComponent,
3731 idct_blocks: &mut [Option<[i32; 64]>],
3732 x: usize,
3733 low_idx: usize,
3734) -> Result<i32, JpegToHtj2kError> {
3735 let height = component.height as usize;
3736 reversible_lift_53_low_at_fallible(height, low_idx, |y| {
3737 component_sample_i32(component, idct_blocks, x, y)
3738 })
3739}
3740
3741fn vertical_high_53_i32_at(
3742 component: &JpegDctComponent,
3743 idct_blocks: &mut [Option<[i32; 64]>],
3744 x: usize,
3745 high_idx: usize,
3746) -> Result<i32, JpegToHtj2kError> {
3747 let height = component.height as usize;
3748 reversible_lift_53_high_at_fallible(height, high_idx, |y| {
3749 component_sample_i32(component, idct_blocks, x, y)
3750 })
3751}
3752
3753fn component_sample_i32(
3754 component: &JpegDctComponent,
3755 idct_blocks: &mut [Option<[i32; 64]>],
3756 x: usize,
3757 y: usize,
3758) -> Result<i32, JpegToHtj2kError> {
3759 if x >= component.width as usize || y >= component.height as usize {
3760 return Err(JpegToHtj2kError::Validation(
3761 "component sample coordinate exceeds dimensions",
3762 ));
3763 }
3764 let block_cols = component.block_cols as usize;
3765 let block_x = x / 8;
3766 let block_y = y / 8;
3767 let block_idx = block_y * block_cols + block_x;
3768 let block = component
3769 .dequantized_blocks
3770 .get(block_idx)
3771 .ok_or(JpegToHtj2kError::Validation(
3772 "component block grid does not cover requested sample",
3773 ))?;
3774 let cached = idct_blocks
3775 .get_mut(block_idx)
3776 .ok_or(JpegToHtj2kError::Validation(
3777 "integer IDCT cache does not cover requested block",
3778 ))?;
3779 let block_samples = cached.get_or_insert_with(|| {
3780 let decoded = idct_islow_block(block);
3781 decoded.map(|sample| i32::from(sample) - 128)
3782 });
3783 let local_idx = (y % 8) * 8 + (x % 8);
3784 Ok(block_samples[local_idx])
3785}
3786
3787fn integer_reference_coefficients(
3788 component: &JpegDctComponent,
3789 decomposition_levels: u8,
3790) -> Result<Vec<i32>, JpegToHtj2kError> {
3791 let samples = idct_component_samples_i32(component)?;
3792 let wavelet = reversible_dwt53_i32(
3793 samples,
3794 component.width as usize,
3795 component.height as usize,
3796 usize::from(decomposition_levels),
3797 );
3798 Ok(flatten_integer_wavelet(&wavelet))
3799}
3800
3801fn idct_component_samples_i32(component: &JpegDctComponent) -> Result<Vec<i32>, JpegToHtj2kError> {
3802 validate_component_block_grid(component)?;
3803
3804 let width = component.width as usize;
3805 let height = component.height as usize;
3806 let block_cols = component.block_cols as usize;
3807 let block_rows = component.block_rows as usize;
3808 let mut samples = vec![0; width * height];
3809 for block_y in 0..block_rows {
3810 for block_x in 0..block_cols {
3811 let block = &component.dequantized_blocks[block_y * block_cols + block_x];
3812 let block_samples = idct_islow_block(block);
3813 for local_y in 0..8 {
3814 let y = block_y * 8 + local_y;
3815 if y >= height {
3816 continue;
3817 }
3818 for local_x in 0..8 {
3819 let x = block_x * 8 + local_x;
3820 if x >= width {
3821 continue;
3822 }
3823 samples[y * width + x] = i32::from(block_samples[local_y * 8 + local_x]) - 128;
3824 }
3825 }
3826 }
3827 }
3828
3829 Ok(samples)
3830}
3831
3832fn validate_component_block_grid(component: &JpegDctComponent) -> Result<(), JpegToHtj2kError> {
3833 let block_cols = component.block_cols as usize;
3834 let block_rows = component.block_rows as usize;
3835 let expected_blocks =
3836 block_cols
3837 .checked_mul(block_rows)
3838 .ok_or(JpegToHtj2kError::Validation(
3839 "component block grid overflow",
3840 ))?;
3841 if component.dequantized_blocks.len() != expected_blocks {
3842 return Err(JpegToHtj2kError::Validation(
3843 "component block count does not match block grid",
3844 ));
3845 }
3846
3847 Ok(())
3848}
3849
3850fn reversible_dwt53_i32(
3851 mut buffer: Vec<i32>,
3852 width: usize,
3853 height: usize,
3854 decomposition_levels: usize,
3855) -> IntegerWavelet {
3856 let mut current_width = width;
3857 let mut current_height = height;
3858 let mut levels = Vec::with_capacity(decomposition_levels);
3859
3860 for _ in 0..decomposition_levels {
3861 for x in 0..current_width {
3862 let mut column = Vec::with_capacity(current_height);
3863 for y in 0..current_height {
3864 column.push(buffer[y * width + x]);
3865 }
3866 reversible_lift_53_i32(&mut column);
3867 let low_len = current_height.div_ceil(2);
3868 for (idx, value) in column.iter().step_by(2).copied().enumerate() {
3869 buffer[idx * width + x] = value;
3870 }
3871 for (idx, value) in column.iter().skip(1).step_by(2).copied().enumerate() {
3872 buffer[(low_len + idx) * width + x] = value;
3873 }
3874 }
3875
3876 for y in 0..current_height {
3877 let row_start = y * width;
3878 let mut row = buffer[row_start..row_start + current_width].to_vec();
3879 reversible_lift_53_i32(&mut row);
3880 let low_len = current_width.div_ceil(2);
3881 for (idx, value) in row.iter().step_by(2).copied().enumerate() {
3882 buffer[row_start + idx] = value;
3883 }
3884 for (idx, value) in row.iter().skip(1).step_by(2).copied().enumerate() {
3885 buffer[row_start + low_len + idx] = value;
3886 }
3887 }
3888
3889 let low_width = current_width.div_ceil(2);
3890 let low_height = current_height.div_ceil(2);
3891 let high_width = current_width / 2;
3892 let high_height = current_height / 2;
3893 let mut hl = Vec::with_capacity(high_width * low_height);
3894 let mut lh = Vec::with_capacity(low_width * high_height);
3895 let mut hh = Vec::with_capacity(high_width * high_height);
3896
3897 for y in 0..low_height {
3898 for x in 0..high_width {
3899 hl.push(buffer[y * width + low_width + x]);
3900 }
3901 }
3902 for y in 0..high_height {
3903 for x in 0..low_width {
3904 lh.push(buffer[(low_height + y) * width + x]);
3905 }
3906 }
3907 for y in 0..high_height {
3908 for x in 0..high_width {
3909 hh.push(buffer[(low_height + y) * width + low_width + x]);
3910 }
3911 }
3912
3913 levels.push(IntegerWaveletLevel {
3914 width: current_width,
3915 height: current_height,
3916 low_width,
3917 low_height,
3918 high_width,
3919 high_height,
3920 hl,
3921 lh,
3922 hh,
3923 });
3924 current_width = low_width;
3925 current_height = low_height;
3926 }
3927
3928 let mut final_ll = Vec::with_capacity(current_width * current_height);
3929 for y in 0..current_height {
3930 for x in 0..current_width {
3931 final_ll.push(buffer[y * width + x]);
3932 }
3933 }
3934
3935 IntegerWavelet {
3936 final_ll,
3937 final_ll_width: current_width,
3938 final_ll_height: current_height,
3939 levels,
3940 }
3941}
3942
3943fn flatten_integer_wavelet(wavelet: &IntegerWavelet) -> Vec<i32> {
3944 let coefficient_count = wavelet.final_ll.len()
3945 + wavelet
3946 .levels
3947 .iter()
3948 .map(|level| level.hl.len() + level.lh.len() + level.hh.len())
3949 .sum::<usize>();
3950 let mut output = Vec::with_capacity(coefficient_count);
3951 output.extend_from_slice(&wavelet.final_ll);
3952 for level in wavelet.levels.iter().rev() {
3953 output.extend_from_slice(&level.hl);
3954 output.extend_from_slice(&level.lh);
3955 output.extend_from_slice(&level.hh);
3956 }
3957 output
3958}
3959
3960fn append_rounded_i32(values: &[f64], output: &mut Vec<i32>) -> Result<(), JpegToHtj2kError> {
3961 for &value in values {
3962 output.push(round_f64_to_i32(value)?);
3963 }
3964 Ok(())
3965}
3966
3967fn round_f64_to_i32(value: f64) -> Result<i32, JpegToHtj2kError> {
3968 let rounded = value.round();
3969 if !rounded.is_finite() {
3970 return Err(JpegToHtj2kError::Validation(
3971 "float reference coefficient is not finite",
3972 ));
3973 }
3974 if rounded < f64::from(i32::MIN) || rounded > f64::from(i32::MAX) {
3975 return Err(JpegToHtj2kError::Validation(
3976 "float reference coefficient exceeds i32 range",
3977 ));
3978 }
3979 Ok(rounded as i32)
3980}
3981
3982fn decomposition_levels_for_components(
3983 components: &[JpegDctComponent],
3984 requested_levels: u8,
3985) -> Result<u8, JpegToHtj2kError> {
3986 if requested_levels == 0 {
3987 return Err(JpegToHtj2kError::Unsupported(
3988 "jpeg_to_htj2k requires at least one decomposition level",
3989 ));
3990 }
3991
3992 let available_levels = components
3993 .iter()
3994 .map(|component| available_decomposition_levels(component.width, component.height))
3995 .min()
3996 .ok_or(JpegToHtj2kError::Unsupported("missing JPEG components"))?;
3997 let decomposition_levels = requested_levels.min(available_levels);
3998 if decomposition_levels == 0 {
3999 return Err(JpegToHtj2kError::Unsupported(
4000 "component dimensions are too small for a DWT decomposition",
4001 ));
4002 }
4003
4004 Ok(decomposition_levels)
4005}
4006
4007fn available_decomposition_levels(width: u32, height: u32) -> u8 {
4008 let min_dim = width.min(height);
4009 if min_dim <= 1 {
4010 0
4011 } else {
4012 min_dim.ilog2() as u8
4013 }
4014}
4015
4016fn component_sampling_for_jpeg(
4017 components: &[JpegDctComponent],
4018 reference_width: u32,
4019 reference_height: u32,
4020) -> Result<Vec<(u8, u8)>, JpegToHtj2kError> {
4021 let max_h = components
4022 .iter()
4023 .map(|component| component.h_samp)
4024 .max()
4025 .ok_or(JpegToHtj2kError::Unsupported("missing JPEG components"))?;
4026 let max_v = components
4027 .iter()
4028 .map(|component| component.v_samp)
4029 .max()
4030 .ok_or(JpegToHtj2kError::Unsupported("missing JPEG components"))?;
4031
4032 components
4033 .iter()
4034 .map(|component| {
4035 if component.h_samp == 0 || component.v_samp == 0 {
4036 return Err(JpegToHtj2kError::Unsupported(
4037 "JPEG component sampling factors must be non-zero",
4038 ));
4039 }
4040 if max_h % component.h_samp != 0 || max_v % component.v_samp != 0 {
4041 return Err(JpegToHtj2kError::Unsupported(
4042 "fractional JPEG component sampling is not supported",
4043 ));
4044 }
4045
4046 let x_rsiz = max_h / component.h_samp;
4047 let y_rsiz = max_v / component.v_samp;
4048 let expected_width = reference_width.div_ceil(u32::from(x_rsiz));
4049 let expected_height = reference_height.div_ceil(u32::from(y_rsiz));
4050 if component.width != expected_width || component.height != expected_height {
4051 return Err(JpegToHtj2kError::Unsupported(
4052 "JPEG component dimensions do not match derived SIZ sampling",
4053 ));
4054 }
4055
4056 Ok((x_rsiz, y_rsiz))
4057 })
4058 .collect()
4059}
4060
4061fn dct_blocks_to_8x8_f64_into(blocks: &[[i16; 64]], output: &mut Vec<[[f64; 8]; 8]>) {
4062 output.clear();
4063 output.reserve(blocks.len());
4064 for block in blocks {
4065 let mut converted = [[0.0; 8]; 8];
4066 for (idx, &coefficient) in block.iter().enumerate() {
4067 converted[idx / 8][idx % 8] = f64::from(coefficient);
4068 }
4069 output.push(converted);
4070 }
4071}
4072
4073fn dct_blocks_to_8x8_f64(blocks: &[[i16; 64]]) -> Vec<[[f64; 8]; 8]> {
4074 let mut output = Vec::with_capacity(blocks.len());
4075 dct_blocks_to_8x8_f64_into(blocks, &mut output);
4076 output
4077}
4078
4079#[cfg(test)]
4080mod tests {
4081 use super::*;
4082 use crate::accelerator::{
4083 DctGridI16ToHtj2k97CodeBlockBatch, PreencodedHtj2k97CodeBlock,
4084 PreencodedHtj2k97CompactCodeBlock, PreencodedHtj2k97CompactComponent,
4085 PreencodedHtj2k97CompactResolution, PreencodedHtj2k97CompactSubband,
4086 PreencodedHtj2k97Resolution, PreencodedHtj2k97Subband,
4087 };
4088 use j2k::adapter::encode_stage::{EncodedHtJ2kCodeBlock, J2kHtCodeBlockEncodeJob};
4089 use j2k_jpeg::transcode::JpegDctCodingMode;
4090 use j2k_jpeg::ColorSpace;
4091
4092 #[test]
4093 fn timing_report_add_assign_saturates_and_adds_all_counter_kinds() {
4094 let mut report = TranscodeTimingReport {
4095 source_raw_probe_us: u128::MAX - 1,
4096 dwt97_batch_ht_codeblock_dispatches: usize::MAX - 1,
4097 tile_count: 2,
4098 accelerator_jobs: 3,
4099 cpu_fallback_jobs: 4,
4100 ..TranscodeTimingReport::default()
4101 };
4102 report.add_assign(TranscodeTimingReport {
4103 source_raw_probe_us: 10,
4104 dwt97_batch_ht_codeblock_dispatches: 10,
4105 tile_count: 5,
4106 accelerator_jobs: 7,
4107 cpu_fallback_jobs: 11,
4108 ..TranscodeTimingReport::default()
4109 });
4110
4111 assert_eq!(report.source_raw_probe_us, u128::MAX);
4112 assert_eq!(report.dwt97_batch_ht_codeblock_dispatches, usize::MAX);
4113 assert_eq!(report.tile_count, 7);
4114 assert_eq!(report.accelerator_jobs, 10);
4115 assert_eq!(report.cpu_fallback_jobs, 15);
4116 }
4117
4118 #[derive(Default)]
4119 struct GroupedI16Accelerator {
4120 grouped_calls: usize,
4121 single_calls: usize,
4122 grouped_lengths: Vec<Vec<usize>>,
4123 }
4124
4125 impl DctToWaveletStageAccelerator for GroupedI16Accelerator {
4126 fn supports_htj2k97_i16_preencoded_batch(&self) -> bool {
4127 true
4128 }
4129
4130 fn dct_grid_i16_to_htj2k97_preencoded_batch(
4131 &mut self,
4132 jobs: &[DctGridI16ToHtj2k97CodeBlockJob<'_>],
4133 _options: Htj2k97CodeBlockOptions,
4134 ) -> Result<Option<Vec<PreencodedHtj2k97Component>>, TranscodeStageError> {
4135 self.single_calls = self.single_calls.saturating_add(1);
4136 Ok(Some(
4137 jobs.iter()
4138 .map(|job| dummy_preencoded_component(job.x_rsiz, job.y_rsiz))
4139 .collect(),
4140 ))
4141 }
4142
4143 fn dct_grid_i16_to_htj2k97_preencoded_batch_groups(
4144 &mut self,
4145 groups: &[DctGridI16ToHtj2k97CodeBlockBatch<'_, '_>],
4146 _options: Htj2k97CodeBlockOptions,
4147 ) -> Result<Option<Vec<Vec<PreencodedHtj2k97Component>>>, TranscodeStageError> {
4148 self.grouped_calls = self.grouped_calls.saturating_add(1);
4149 self.grouped_lengths
4150 .push(groups.iter().map(|group| group.jobs.len()).collect());
4151 Ok(Some(
4152 groups
4153 .iter()
4154 .map(|group| {
4155 group
4156 .jobs
4157 .iter()
4158 .map(|job| dummy_preencoded_component(job.x_rsiz, job.y_rsiz))
4159 .collect()
4160 })
4161 .collect(),
4162 ))
4163 }
4164 }
4165
4166 #[test]
4167 fn float97_batch_offers_i16_preencoded_geometry_groups_together() {
4168 let mut tiles = vec![test_float97_tile()];
4169 let options = JpegToHtj2kOptions::lossy_97();
4170 let mut scratch = JpegToHtj2kScratch::default();
4171 let mut accelerator = GroupedI16Accelerator::default();
4172 let mut timings = TranscodeTimingReport::default();
4173
4174 let (batch_count, job_count) = transform_float97_batch_tiles(
4175 &mut tiles,
4176 &options,
4177 &mut scratch,
4178 &mut accelerator,
4179 &mut timings,
4180 )
4181 .expect("grouped i16 preencoded transform");
4182
4183 assert_eq!(batch_count, 2);
4184 assert_eq!(job_count, 3);
4185 assert_eq!(accelerator.grouped_calls, 1);
4186 assert_eq!(accelerator.single_calls, 0);
4187 assert_eq!(accelerator.grouped_lengths, vec![vec![1, 2]]);
4188 assert!(tiles[0].preencoded_components.iter().all(Option::is_some));
4189 }
4190
4191 #[derive(Default)]
4192 struct CountingHtBatchEncodeAccelerator {
4193 batches: usize,
4194 jobs: usize,
4195 single_blocks: usize,
4196 }
4197
4198 impl J2kEncodeStageAccelerator for CountingHtBatchEncodeAccelerator {
4199 fn encode_ht_code_blocks(
4200 &mut self,
4201 jobs: &[J2kHtCodeBlockEncodeJob<'_>],
4202 ) -> Result<Option<Vec<EncodedHtJ2kCodeBlock>>, &'static str> {
4203 self.batches = self.batches.saturating_add(1);
4204 self.jobs = self.jobs.saturating_add(jobs.len());
4205 Ok(None)
4206 }
4207
4208 fn encode_ht_code_block(
4209 &mut self,
4210 _job: J2kHtCodeBlockEncodeJob<'_>,
4211 ) -> Result<Option<EncodedHtJ2kCodeBlock>, &'static str> {
4212 self.single_blocks = self.single_blocks.saturating_add(1);
4213 Ok(None)
4214 }
4215 }
4216
4217 #[test]
4218 fn float97_precomputed_prepared_tiles_offer_all_tiles_to_one_ht_batch() {
4219 let tiles = vec![
4220 test_float97_precomputed_tile(0),
4221 test_float97_precomputed_tile(1),
4222 ];
4223 let mut options = JpegToHtj2kOptions::lossy_97();
4224 options.encode_options.code_block_width_exp = 2;
4225 options.encode_options.code_block_height_exp = 2;
4226 let mut accelerator = CountingHtBatchEncodeAccelerator::default();
4227
4228 let encoded_tiles = encode_float97_prepared_tiles(tiles, &options, &mut accelerator);
4229
4230 assert_eq!(encoded_tiles.len(), 2);
4231 for (expected_tile_index, (actual_tile_index, encoded)) in
4232 encoded_tiles.into_iter().enumerate()
4233 {
4234 assert_eq!(actual_tile_index, expected_tile_index);
4235 let encoded = encoded.expect("precomputed batch tile encodes");
4236 assert!(encoded.codestream.starts_with(&[0xff, 0x4f]));
4237 }
4238 assert_eq!(accelerator.batches, 1);
4239 assert!(accelerator.jobs > 0);
4240 assert_eq!(accelerator.single_blocks, accelerator.jobs);
4241 }
4242
4243 #[test]
4244 fn compact_preencoded_component_storage_rebases_ranges_into_tile_payload() {
4245 let mut tile = test_float97_tile();
4246 let batch_payload = vec![1, 2, 3, 4, 5, 6];
4247 let component = PreencodedHtj2k97CompactComponent {
4248 x_rsiz: 1,
4249 y_rsiz: 1,
4250 resolutions: vec![PreencodedHtj2k97CompactResolution {
4251 subbands: vec![PreencodedHtj2k97CompactSubband {
4252 sub_band_type: crate::accelerator::J2kSubBandType::LowLow,
4253 num_cbs_x: 2,
4254 num_cbs_y: 1,
4255 total_bitplanes: 1,
4256 code_blocks: vec![
4257 PreencodedHtj2k97CompactCodeBlock {
4258 width: 1,
4259 height: 1,
4260 payload_range: 1..3,
4261 cleanup_length: 2,
4262 refinement_length: 0,
4263 num_coding_passes: 1,
4264 num_zero_bitplanes: 0,
4265 },
4266 PreencodedHtj2k97CompactCodeBlock {
4267 width: 1,
4268 height: 1,
4269 payload_range: 3..6,
4270 cleanup_length: 3,
4271 refinement_length: 0,
4272 num_coding_passes: 1,
4273 num_zero_bitplanes: 0,
4274 },
4275 ],
4276 }],
4277 }],
4278 };
4279
4280 store_compact_preencoded_component(&mut tile, 1, &batch_payload, component)
4281 .expect("compact component storage");
4282
4283 let stored = tile.preencoded_compact_components[1]
4284 .as_ref()
4285 .expect("stored compact component");
4286 assert_eq!(tile.preencoded_compact_payload, vec![2, 3, 4, 5, 6]);
4287 assert_eq!(
4288 stored.resolutions[0].subbands[0].code_blocks[0].payload_range,
4289 0..2
4290 );
4291 assert_eq!(
4292 stored.resolutions[0].subbands[0].code_blocks[1].payload_range,
4293 2..5
4294 );
4295 }
4296
4297 fn test_float97_tile() -> Float97BatchTile {
4298 let components = vec![
4299 test_component(0, 16, 16, 2, 2),
4300 test_component(1, 8, 8, 1, 1),
4301 test_component(2, 8, 8, 1, 1),
4302 ];
4303 Float97BatchTile {
4304 tile_index: 0,
4305 jpeg: JpegDctImage {
4306 width: 16,
4307 height: 16,
4308 color_space: ColorSpace::YCbCr,
4309 coding_mode: JpegDctCodingMode::BaselineSequential,
4310 scan_count: 1,
4311 components,
4312 restart_index: None,
4313 },
4314 component_sampling: vec![(1, 1), (2, 2), (2, 2)],
4315 decomposition_levels: 1,
4316 all_unit_sampled: false,
4317 component_reports: Vec::new(),
4318 precomputed_components: vec![None, None, None],
4319 preencoded_compact_payload: Vec::new(),
4320 preencoded_compact_components: vec![None, None, None],
4321 preencoded_components: vec![None, None, None],
4322 prequantized_components: vec![None, None, None],
4323 float_validation_actual: Vec::new(),
4324 float_validation_expected: Vec::new(),
4325 timings: TranscodeTimingReport::default(),
4326 }
4327 }
4328
4329 fn test_float97_precomputed_tile(tile_index: usize) -> Float97BatchTile {
4330 let width = 17;
4331 let height = 13;
4332 let component = test_component(0, width, height, 1, 1);
4333 Float97BatchTile {
4334 tile_index,
4335 jpeg: JpegDctImage {
4336 width,
4337 height,
4338 color_space: ColorSpace::Grayscale,
4339 coding_mode: JpegDctCodingMode::BaselineSequential,
4340 scan_count: 1,
4341 components: vec![component],
4342 restart_index: None,
4343 },
4344 component_sampling: vec![(1, 1)],
4345 decomposition_levels: 1,
4346 all_unit_sampled: true,
4347 component_reports: vec![TranscodeComponentReport {
4348 component_index: 0,
4349 width,
4350 height,
4351 block_cols: width.div_ceil(8),
4352 block_rows: height.div_ceil(8),
4353 x_rsiz: 1,
4354 y_rsiz: 1,
4355 }],
4356 precomputed_components: vec![Some(dummy_precomputed_component(1, 1, width, height))],
4357 preencoded_compact_payload: Vec::new(),
4358 preencoded_compact_components: vec![None],
4359 preencoded_components: vec![None],
4360 prequantized_components: vec![None],
4361 float_validation_actual: Vec::new(),
4362 float_validation_expected: Vec::new(),
4363 timings: TranscodeTimingReport::default(),
4364 }
4365 }
4366
4367 fn test_component(
4368 component_index: usize,
4369 width: u32,
4370 height: u32,
4371 h_samp: u8,
4372 v_samp: u8,
4373 ) -> JpegDctComponent {
4374 let block_cols = width.div_ceil(8);
4375 let block_rows = height.div_ceil(8);
4376 let block_count = (block_cols * block_rows) as usize;
4377 JpegDctComponent {
4378 component_index,
4379 width,
4380 height,
4381 h_samp,
4382 v_samp,
4383 block_cols,
4384 block_rows,
4385 quant_table: [1u16; 64],
4386 quantized_blocks: vec![[0i16; 64]; block_count],
4387 dequantized_blocks: vec![[0i16; 64]; block_count],
4388 }
4389 }
4390
4391 fn dummy_precomputed_component(
4392 x_rsiz: u8,
4393 y_rsiz: u8,
4394 width: u32,
4395 height: u32,
4396 ) -> PrecomputedHtj2k97Component {
4397 let low_width = width.div_ceil(2);
4398 let low_height = height.div_ceil(2);
4399 let high_width = width / 2;
4400 let high_height = height / 2;
4401 PrecomputedHtj2k97Component {
4402 x_rsiz,
4403 y_rsiz,
4404 dwt: J2kForwardDwt97Output {
4405 ll: sample_f32_coefficients(low_width * low_height, 0.25),
4406 ll_width: low_width,
4407 ll_height: low_height,
4408 levels: vec![J2kForwardDwt97Level {
4409 hl: sample_f32_coefficients(high_width * low_height, -0.75),
4410 lh: sample_f32_coefficients(low_width * high_height, 1.25),
4411 hh: sample_f32_coefficients(high_width * high_height, -1.5),
4412 width,
4413 height,
4414 low_width,
4415 low_height,
4416 high_width,
4417 high_height,
4418 }],
4419 },
4420 }
4421 }
4422
4423 fn sample_f32_coefficients(count: u32, seed: f32) -> Vec<f32> {
4424 (0..count)
4425 .map(|idx| seed + (idx as f32).sin() * 0.125)
4426 .collect()
4427 }
4428
4429 fn dummy_preencoded_component(x_rsiz: u8, y_rsiz: u8) -> PreencodedHtj2k97Component {
4430 PreencodedHtj2k97Component {
4431 x_rsiz,
4432 y_rsiz,
4433 resolutions: vec![PreencodedHtj2k97Resolution {
4434 subbands: vec![PreencodedHtj2k97Subband {
4435 sub_band_type: crate::accelerator::J2kSubBandType::LowLow,
4436 num_cbs_x: 1,
4437 num_cbs_y: 1,
4438 total_bitplanes: 1,
4439 code_blocks: vec![PreencodedHtj2k97CodeBlock {
4440 width: 1,
4441 height: 1,
4442 encoded: EncodedHtJ2kCodeBlock {
4443 data: Vec::new(),
4444 cleanup_length: 0,
4445 refinement_length: 0,
4446 num_coding_passes: 0,
4447 num_zero_bitplanes: 1,
4448 },
4449 }],
4450 }],
4451 }],
4452 }
4453 }
4454}