1use core::fmt;
6use std::time::Instant;
7
8use j2k::adapter::encode_stage::{
9 CpuOnlyJ2kEncodeStageAccelerator, IrreversibleQuantizationSubbandScales,
10 J2kEncodeDispatchReport, J2kEncodeStageAccelerator, J2kForwardDwt53Level,
11 J2kForwardDwt53Output, J2kForwardDwt97Level, J2kForwardDwt97Output, NativeEncodeStageAdapter,
12 PrecomputedHtj2k53Component, PrecomputedHtj2k53Image, PrecomputedHtj2k97Component,
13 PrecomputedHtj2k97Image, PreencodedHtj2k97CompactComponent, PreencodedHtj2k97CompactImage,
14 PreencodedHtj2k97Component, PreencodedHtj2k97Image, PrequantizedHtj2k97Component,
15 PrequantizedHtj2k97Image,
16};
17use j2k::J2kProgressionOrder;
18use j2k_jpeg::transcode::{
19 extract_dct_blocks, idct_islow_block, DctExtractOptions, JpegDctComponent, JpegDctImage,
20};
21use j2k_native::{
22 encode_precomputed_htj2k_53_with_accelerator,
23 encode_precomputed_htj2k_97_batch_with_accelerator,
24 encode_precomputed_htj2k_97_with_accelerator,
25 encode_preencoded_htj2k_97_compact_owned_with_accelerator,
26 encode_preencoded_htj2k_97_owned_with_accelerator,
27 encode_prequantized_htj2k_97_with_accelerator,
28};
29use rayon::prelude::*;
30
31use crate::accelerator::{
32 CpuOnlyDctToWaveletStageAccelerator, DctGridI16ToHtj2k97CodeBlockBatch,
33 DctGridI16ToHtj2k97CodeBlockJob, DctGridToDwt53Job, DctGridToDwt97Job,
34 DctGridToHtj2k97CodeBlockJob, DctGridToReversibleDwt53Job, DctToWaveletStageAccelerator,
35 Dwt97BatchStageTimings, Htj2k97CodeBlockOptions, ReversibleDwt53FirstLevel,
36 TranscodeStageError,
37};
38use crate::dct53_2d::{
39 dct8x8_blocks_then_dwt53_float, dct8x8_blocks_to_dwt53_float_linear_with_scratch,
40 linearized_53_2d_from_plane, Dct53GridScratch, Dwt53TwoDimensional,
41};
42use crate::dct97_2d::{
43 dct8x8_blocks_then_dwt97_float, dct8x8_blocks_then_dwt97_float_with_scratch,
44 linearized_97_2d_from_plane_with_scratch, Dct97GridScratch, Dwt97TwoDimensional,
45};
46use crate::metrics::{error_metrics_i32, ErrorMetrics, MetricsLengthError};
47use crate::reversible53::{
48 reversible_lift_53_high_at_fallible, reversible_lift_53_i32, reversible_lift_53_low_at_fallible,
49};
50use crate::DctGridError;
51
52pub const JPEG_TO_HTJ2K_LOSSY_97_QUANTIZATION_SCALE: f32 = 1.9;
59
60#[derive(Debug, Clone, PartialEq)]
62#[allow(clippy::struct_excessive_bools)]
63pub struct JpegToHtj2kEncodeOptions {
64 pub num_decomposition_levels: u8,
66 pub reversible: bool,
68 pub code_block_width_exp: u8,
70 pub code_block_height_exp: u8,
72 pub guard_bits: u8,
74 pub use_ht_block_coding: bool,
76 pub progression_order: J2kProgressionOrder,
78 pub write_tlm: bool,
80 pub write_plt: bool,
82 pub write_plm: bool,
84 pub write_ppm: bool,
86 pub write_ppt: bool,
88 pub write_sop: bool,
90 pub write_eph: bool,
92 pub use_mct: bool,
94 pub num_layers: u8,
96 pub quality_layer_byte_targets: Vec<u64>,
98 pub validate_high_throughput_codestream: bool,
100 pub irreversible_quantization_scale: f32,
102 pub irreversible_quantization_subband_scales: IrreversibleQuantizationSubbandScales,
104 pub component_sampling: Option<Vec<(u8, u8)>>,
106 pub tile_size: Option<(u32, u32)>,
108 pub tile_part_packet_limit: Option<u16>,
110 pub precinct_exponents: Vec<(u8, u8)>,
112}
113
114impl Default for JpegToHtj2kEncodeOptions {
115 fn default() -> Self {
116 Self {
117 num_decomposition_levels: 5,
118 reversible: true,
119 code_block_width_exp: 4,
120 code_block_height_exp: 4,
121 guard_bits: 1,
122 use_ht_block_coding: false,
123 progression_order: J2kProgressionOrder::Lrcp,
124 write_tlm: false,
125 write_plt: false,
126 write_plm: false,
127 write_ppm: false,
128 write_ppt: false,
129 write_sop: false,
130 write_eph: false,
131 use_mct: true,
132 num_layers: 1,
133 quality_layer_byte_targets: Vec::new(),
134 validate_high_throughput_codestream: true,
135 irreversible_quantization_scale: 1.0,
136 irreversible_quantization_subband_scales:
137 IrreversibleQuantizationSubbandScales::default(),
138 component_sampling: None,
139 tile_size: None,
140 tile_part_packet_limit: None,
141 precinct_exponents: Vec::new(),
142 }
143 }
144}
145
146impl JpegToHtj2kEncodeOptions {
147 fn to_native(&self) -> j2k_native::EncodeOptions {
148 j2k_native::EncodeOptions {
149 num_decomposition_levels: self.num_decomposition_levels,
150 reversible: self.reversible,
151 code_block_width_exp: self.code_block_width_exp,
152 code_block_height_exp: self.code_block_height_exp,
153 guard_bits: self.guard_bits,
154 use_ht_block_coding: self.use_ht_block_coding,
155 progression_order: native_progression_order(self.progression_order),
156 write_tlm: self.write_tlm,
157 write_plt: self.write_plt,
158 write_plm: self.write_plm,
159 write_ppm: self.write_ppm,
160 write_ppt: self.write_ppt,
161 write_sop: self.write_sop,
162 write_eph: self.write_eph,
163 use_mct: self.use_mct,
164 num_layers: self.num_layers,
165 quality_layer_byte_targets: self.quality_layer_byte_targets.clone(),
166 validate_high_throughput_codestream: self.validate_high_throughput_codestream,
167 irreversible_quantization_scale: self.irreversible_quantization_scale,
168 irreversible_quantization_subband_scales: self.irreversible_quantization_subband_scales,
169 component_sampling: self.component_sampling.clone(),
170 tile_size: self.tile_size,
171 tile_part_packet_limit: self.tile_part_packet_limit,
172 precinct_exponents: self.precinct_exponents.clone(),
173 roi_component_shifts: Vec::new(),
174 }
175 }
176}
177
178#[derive(Debug, Clone)]
180pub struct JpegToHtj2kOptions {
181 pub encode_options: JpegToHtj2kEncodeOptions,
183 pub coefficient_path: JpegToHtj2kCoefficientPath,
185 pub validate_against_float_reference: bool,
189 pub validate_against_integer_reference: bool,
194}
195
196impl Default for JpegToHtj2kOptions {
197 fn default() -> Self {
198 Self::lossless_53()
199 }
200}
201
202impl JpegToHtj2kOptions {
203 #[must_use]
205 pub fn lossless_53() -> Self {
206 Self {
207 encode_options: transcode_encode_options(true),
208 coefficient_path: JpegToHtj2kCoefficientPath::IntegerDirect53,
209 validate_against_float_reference: false,
210 validate_against_integer_reference: false,
211 }
212 }
213
214 #[must_use]
216 pub fn lossy_97() -> Self {
217 let mut encode_options = transcode_encode_options(false);
218 encode_options.irreversible_quantization_scale = JPEG_TO_HTJ2K_LOSSY_97_QUANTIZATION_SCALE;
219 Self {
220 encode_options,
221 coefficient_path: JpegToHtj2kCoefficientPath::FloatDirectLinear97,
222 validate_against_float_reference: false,
223 validate_against_integer_reference: false,
224 }
225 }
226}
227
228fn transcode_encode_options(reversible: bool) -> JpegToHtj2kEncodeOptions {
229 JpegToHtj2kEncodeOptions {
230 num_decomposition_levels: 1,
231 reversible,
232 use_ht_block_coding: true,
233 use_mct: false,
234 validate_high_throughput_codestream: false,
235 ..JpegToHtj2kEncodeOptions::default()
236 }
237}
238
239#[derive(Debug, Clone, Copy, PartialEq, Eq)]
241pub enum JpegToHtj2kCoefficientPath {
242 IntegerDirect53,
247 FloatDirectLinear53,
252 FloatDirectLinear97,
256}
257
258#[derive(Debug, Default)]
264pub struct JpegToHtj2kTranscoder {
265 scratch: JpegToHtj2kScratch,
266}
267
268impl JpegToHtj2kTranscoder {
269 pub fn transcode(
272 &mut self,
273 bytes: &[u8],
274 options: &JpegToHtj2kOptions,
275 ) -> Result<EncodedTranscode, JpegToHtj2kError> {
276 let mut accelerator = CpuOnlyDctToWaveletStageAccelerator;
277 self.transcode_with_accelerator(bytes, options, &mut accelerator)
278 }
279
280 pub fn transcode_with_accelerator<A: DctToWaveletStageAccelerator>(
286 &mut self,
287 bytes: &[u8],
288 options: &JpegToHtj2kOptions,
289 accelerator: &mut A,
290 ) -> Result<EncodedTranscode, JpegToHtj2kError> {
291 let mut encode_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
292 self.transcode_with_accelerators(bytes, options, accelerator, &mut encode_accelerator)
293 }
294
295 pub fn transcode_with_accelerators<
298 A: DctToWaveletStageAccelerator,
299 E: J2kEncodeStageAccelerator,
300 >(
301 &mut self,
302 bytes: &[u8],
303 options: &JpegToHtj2kOptions,
304 transform_accelerator: &mut A,
305 encode_accelerator: &mut E,
306 ) -> Result<EncodedTranscode, JpegToHtj2kError> {
307 jpeg_to_htj2k_with_scratch(
308 bytes,
309 options,
310 &mut self.scratch,
311 transform_accelerator,
312 encode_accelerator,
313 )
314 }
315
316 pub fn transcode_batch(
320 &mut self,
321 tiles: &[JpegTileBatchInput<'_>],
322 options: &JpegToHtj2kOptions,
323 ) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
324 let mut accelerator = CpuOnlyDctToWaveletStageAccelerator;
325 self.transcode_batch_with_accelerator(tiles, options, &mut accelerator)
326 }
327
328 pub fn transcode_batch_with_accelerator<A: DctToWaveletStageAccelerator>(
330 &mut self,
331 tiles: &[JpegTileBatchInput<'_>],
332 options: &JpegToHtj2kOptions,
333 accelerator: &mut A,
334 ) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
335 let mut encode_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
336 self.transcode_batch_with_accelerators(tiles, options, accelerator, &mut encode_accelerator)
337 }
338
339 pub fn transcode_batch_with_accelerators<
342 A: DctToWaveletStageAccelerator,
343 E: J2kEncodeStageAccelerator,
344 >(
345 &mut self,
346 tiles: &[JpegTileBatchInput<'_>],
347 options: &JpegToHtj2kOptions,
348 transform_accelerator: &mut A,
349 encode_accelerator: &mut E,
350 ) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
351 jpeg_tile_batch_to_htj2k_with_scratch(
352 tiles,
353 options,
354 &mut self.scratch,
355 transform_accelerator,
356 encode_accelerator,
357 )
358 }
359
360 #[must_use]
365 pub fn dct_block_scratch_capacity(&self) -> usize {
366 self.scratch.dct_blocks_f64.capacity()
367 }
368
369 #[must_use]
374 pub fn integer_idct_block_scratch_capacity(&self) -> usize {
375 self.scratch.integer_idct_blocks.capacity()
376 }
377}
378
379#[derive(Debug, Default)]
380struct JpegToHtj2kScratch {
381 dct_blocks_f64: Vec<[[f64; 8]; 8]>,
382 dct53_grid: Dct53GridScratch,
383 dct97_grid: Dct97GridScratch,
384 integer_idct_blocks: Vec<Option<[i32; 64]>>,
385 integer_row: Vec<i32>,
386}
387
388#[derive(Debug, Clone)]
390pub struct EncodedTranscode {
391 pub codestream: Vec<u8>,
393 pub report: TranscodeReport,
395}
396
397#[derive(Debug, Clone, Copy)]
399pub struct JpegTileBatchInput<'a> {
400 pub bytes: &'a [u8],
402}
403
404#[derive(Debug)]
407pub struct EncodedTranscodeBatch {
408 pub tiles: Vec<Result<EncodedTranscode, JpegToHtj2kError>>,
410 pub report: BatchTranscodeReport,
412}
413
414#[derive(Debug, Clone, PartialEq, Eq)]
416pub struct BatchTranscodeReport {
417 pub tile_count: usize,
419 pub successful_tiles: usize,
421 pub failed_tiles: usize,
423 pub transformed_components: usize,
425 pub reversible_dwt53_batches: usize,
427 pub reversible_dwt53_batch_jobs: usize,
429 pub extract_us: u128,
431 pub transform_us: u128,
433 pub encode_us: u128,
435 pub timings: TranscodeTimingReport,
438 pub coefficient_path: JpegToHtj2kCoefficientPath,
440}
441
442#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
449pub struct TranscodeTimingReport {
450 pub source_raw_probe_us: u128,
452 pub read_region_decode_us: u128,
454 pub compose_pad_us: u128,
456 pub generated_jpeg_encode_us: u128,
458 pub jpeg_dct_extract_us: u128,
460 pub jpeg_dct_repack_us: u128,
462 pub dct_to_wavelet_total_us: u128,
464 pub dct_to_wavelet_accelerator_us: u128,
466 pub dct_to_wavelet_cpu_fallback_us: u128,
468 pub dwt_decompose_us: u128,
470 pub dwt97_batch_pack_upload_us: u128,
472 pub dwt97_batch_pack_upload_transfers: usize,
474 pub dwt97_batch_pack_upload_bytes: u64,
476 pub dwt97_batch_resident_dct_handoff_count: usize,
478 pub dwt97_batch_idct_row_lift_us: u128,
480 pub dwt97_batch_column_lift_us: u128,
482 pub dwt97_batch_resident_dwt_handoff_count: usize,
484 pub dwt97_batch_quantize_codeblock_us: u128,
486 pub dwt97_batch_ht_encode_us: u128,
488 pub dwt97_batch_ht_kernel_us: u128,
490 pub dwt97_batch_ht_status_readback_us: u128,
492 pub dwt97_batch_ht_status_readback_transfers: usize,
494 pub dwt97_batch_ht_status_readback_bytes: u64,
496 pub dwt97_batch_ht_compact_us: u128,
498 pub dwt97_batch_ht_output_readback_us: u128,
500 pub dwt97_batch_ht_output_readback_transfers: usize,
502 pub dwt97_batch_ht_output_readback_bytes: u64,
504 pub dwt97_batch_ht_codeblock_dispatches: usize,
506 pub dwt97_batch_readback_us: u128,
508 pub dwt97_batch_readback_transfers: usize,
510 pub dwt97_batch_readback_bytes: u64,
512 pub htj2k_encode_us: u128,
514 pub htj2k_encode_accelerator_dispatches: usize,
516 pub htj2k_encode_ht_code_block_dispatches: usize,
518 pub htj2k_encode_packetization_dispatches: usize,
520 pub dicom_spool_write_us: u128,
522 pub dicom_final_write_us: u128,
524 pub tile_count: usize,
526 pub component_count: usize,
528 pub batch_count: usize,
530 pub batch_jobs: usize,
532 pub accelerator_attempts: usize,
534 pub accelerator_jobs: usize,
536 pub accelerator_dispatches: usize,
538 pub accelerator_dispatched_jobs: usize,
540 pub cpu_fallback_jobs: usize,
542}
543
544impl TranscodeTimingReport {
545 fn add_assign(&mut self, other: Self) {
546 macro_rules! saturating_add_fields {
547 ($($field:ident),+ $(,)?) => {
548 $(
549 self.$field = self.$field.saturating_add(other.$field);
550 )+
551 };
552 }
553
554 saturating_add_fields!(
555 source_raw_probe_us,
556 read_region_decode_us,
557 compose_pad_us,
558 generated_jpeg_encode_us,
559 jpeg_dct_extract_us,
560 jpeg_dct_repack_us,
561 dct_to_wavelet_total_us,
562 dct_to_wavelet_accelerator_us,
563 dct_to_wavelet_cpu_fallback_us,
564 dwt_decompose_us,
565 dwt97_batch_pack_upload_us,
566 dwt97_batch_pack_upload_transfers,
567 dwt97_batch_pack_upload_bytes,
568 dwt97_batch_resident_dct_handoff_count,
569 dwt97_batch_idct_row_lift_us,
570 dwt97_batch_column_lift_us,
571 dwt97_batch_resident_dwt_handoff_count,
572 dwt97_batch_quantize_codeblock_us,
573 dwt97_batch_ht_encode_us,
574 dwt97_batch_ht_kernel_us,
575 dwt97_batch_ht_status_readback_us,
576 dwt97_batch_ht_status_readback_transfers,
577 dwt97_batch_ht_status_readback_bytes,
578 dwt97_batch_ht_compact_us,
579 dwt97_batch_ht_output_readback_us,
580 dwt97_batch_ht_output_readback_transfers,
581 dwt97_batch_ht_output_readback_bytes,
582 dwt97_batch_ht_codeblock_dispatches,
583 dwt97_batch_readback_us,
584 dwt97_batch_readback_transfers,
585 dwt97_batch_readback_bytes,
586 htj2k_encode_us,
587 htj2k_encode_accelerator_dispatches,
588 htj2k_encode_ht_code_block_dispatches,
589 htj2k_encode_packetization_dispatches,
590 dicom_spool_write_us,
591 dicom_final_write_us,
592 tile_count,
593 component_count,
594 batch_count,
595 batch_jobs,
596 accelerator_attempts,
597 accelerator_jobs,
598 accelerator_dispatches,
599 accelerator_dispatched_jobs,
600 cpu_fallback_jobs,
601 );
602 }
603}
604
605#[derive(Debug, Clone, PartialEq, Eq)]
607pub struct TranscodeComponentReport {
608 pub component_index: usize,
610 pub width: u32,
612 pub height: u32,
614 pub block_cols: u32,
616 pub block_rows: u32,
618 pub x_rsiz: u8,
620 pub y_rsiz: u8,
622}
623
624pub type TranscodeValidationMetrics = ErrorMetrics;
626
627#[derive(Debug, Clone, Copy, PartialEq, Eq)]
629pub enum TranscodeValidationClassification {
630 Exact,
632 OneLsbBounded,
636 OutsideThreshold,
638}
639
640impl TranscodeValidationClassification {
641 #[must_use]
644 pub fn classify_metrics(metrics: &TranscodeValidationMetrics) -> Self {
645 if metrics.exact_matches == metrics.total && metrics.max_abs_error == 0 {
646 Self::Exact
647 } else if metrics.is_one_lsb_bounded(0.999) {
648 Self::OneLsbBounded
649 } else {
650 Self::OutsideThreshold
651 }
652 }
653}
654
655#[derive(Debug, Clone, PartialEq, Eq)]
657pub struct TranscodeReport {
658 pub width: u32,
660 pub height: u32,
662 pub component_count: usize,
664 pub components: Vec<TranscodeComponentReport>,
666 pub float_reference_metrics: Option<TranscodeValidationMetrics>,
669 pub float_reference_classification: Option<TranscodeValidationClassification>,
671 pub integer_reference_metrics: Option<TranscodeValidationMetrics>,
674 pub integer_reference_classification: Option<TranscodeValidationClassification>,
676 pub decomposition_levels: u8,
678 pub coefficient_path: JpegToHtj2kCoefficientPath,
680 pub path: &'static str,
682 pub extract_us: u128,
684 pub transform_us: u128,
686 pub encode_us: u128,
688 pub timings: TranscodeTimingReport,
690}
691
692#[derive(Debug)]
694pub enum JpegToHtj2kError {
695 Jpeg(j2k_jpeg::JpegError),
697 Unsupported(&'static str),
699 Grid(String),
701 Grid97(String),
704 Accelerator(TranscodeStageError),
706 Metrics(String),
708 Validation(&'static str),
710 Encode(&'static str),
712}
713
714impl fmt::Display for JpegToHtj2kError {
715 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
716 match self {
717 Self::Jpeg(err) => write!(f, "JPEG extraction failed: {err}"),
718 Self::Unsupported(reason) => write!(f, "unsupported transcode input: {reason}"),
719 Self::Grid(reason) | Self::Grid97(reason) => {
720 write!(f, "DCT grid transform failed: {reason}")
721 }
722 Self::Accelerator(reason) => write!(f, "transform accelerator failed: {reason}"),
723 Self::Metrics(reason) => write!(f, "validation metrics failed: {reason}"),
724 Self::Validation(reason) => write!(f, "validation failed: {reason}"),
725 Self::Encode(reason) => write!(f, "HTJ2K encode failed: {reason}"),
726 }
727 }
728}
729
730impl std::error::Error for JpegToHtj2kError {
731 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
732 match self {
733 Self::Jpeg(err) => Some(err),
734 Self::Unsupported(_)
735 | Self::Grid(_)
736 | Self::Grid97(_)
737 | Self::Accelerator(_)
738 | Self::Metrics(_)
739 | Self::Validation(_)
740 | Self::Encode(_) => None,
741 }
742 }
743}
744
745impl From<j2k_jpeg::JpegError> for JpegToHtj2kError {
746 fn from(value: j2k_jpeg::JpegError) -> Self {
747 Self::Jpeg(value)
748 }
749}
750
751fn dct53_grid_error(value: DctGridError) -> JpegToHtj2kError {
752 JpegToHtj2kError::Grid(value.to_string())
753}
754
755fn dct97_grid_error(value: DctGridError) -> JpegToHtj2kError {
756 JpegToHtj2kError::Grid97(value.to_string())
757}
758
759impl From<MetricsLengthError> for JpegToHtj2kError {
760 fn from(value: MetricsLengthError) -> Self {
761 Self::Metrics(value.to_string())
762 }
763}
764
765pub fn jpeg_to_htj2k(
772 bytes: &[u8],
773 options: &JpegToHtj2kOptions,
774) -> Result<EncodedTranscode, JpegToHtj2kError> {
775 JpegToHtj2kTranscoder::default().transcode(bytes, options)
776}
777
778pub fn jpeg_to_htj2k_batch(
780 tiles: &[JpegTileBatchInput<'_>],
781 options: &JpegToHtj2kOptions,
782) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
783 JpegToHtj2kTranscoder::default().transcode_batch(tiles, options)
784}
785
786fn jpeg_tile_batch_to_htj2k_with_scratch<
787 A: DctToWaveletStageAccelerator,
788 E: J2kEncodeStageAccelerator,
789>(
790 tiles: &[JpegTileBatchInput<'_>],
791 options: &JpegToHtj2kOptions,
792 scratch: &mut JpegToHtj2kScratch,
793 accelerator: &mut A,
794 encode_accelerator: &mut E,
795) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
796 validate_transcode_options(options)?;
797 match options.coefficient_path {
798 JpegToHtj2kCoefficientPath::IntegerDirect53 => {}
799 JpegToHtj2kCoefficientPath::FloatDirectLinear97
800 if accelerator.supports_dwt97_batch()
801 || accelerator.supports_htj2k97_codeblock_batch() =>
802 {
803 return jpeg_float97_tile_batch_to_htj2k_with_scratch(
804 tiles,
805 options,
806 scratch,
807 accelerator,
808 encode_accelerator,
809 );
810 }
811 JpegToHtj2kCoefficientPath::FloatDirectLinear53
812 | JpegToHtj2kCoefficientPath::FloatDirectLinear97 => {
813 return Ok(transcode_tile_batch_individually(
814 tiles,
815 options,
816 scratch,
817 accelerator,
818 encode_accelerator,
819 ));
820 }
821 }
822
823 let extract_start = Instant::now();
824 let prepared_results = tiles
825 .par_iter()
826 .enumerate()
827 .map(|(tile_index, tile)| {
828 (
829 tile_index,
830 prepare_integer_batch_tile(tile_index, tile.bytes, options),
831 )
832 })
833 .collect::<Vec<_>>();
834 let extract_us = extract_start.elapsed().as_micros();
835 let mut tile_results: Vec<Option<Result<EncodedTranscode, JpegToHtj2kError>>> =
836 (0..tiles.len()).map(|_| None).collect();
837 let mut prepared_tiles = Vec::new();
838 for (tile_index, result) in prepared_results {
839 match result {
840 Ok(prepared) => prepared_tiles.push(prepared),
841 Err(error) => tile_results[tile_index] = Some(Err(error)),
842 }
843 }
844
845 let transform_start = Instant::now();
846 let mut timings = TranscodeTimingReport::default();
847 let (reversible_dwt53_batches, reversible_dwt53_batch_jobs) = transform_integer_batch_tiles(
848 &mut prepared_tiles,
849 options,
850 scratch,
851 accelerator,
852 &mut timings,
853 )?;
854 let transform_us = transform_start.elapsed().as_micros();
855 timings.jpeg_dct_extract_us = extract_us;
856 timings.dct_to_wavelet_total_us = transform_us;
857 timings.tile_count = prepared_tiles.len();
858
859 let encode_start = Instant::now();
860 let encoded_tiles = encode_integer_prepared_tiles(prepared_tiles, options, encode_accelerator);
861 for (tile_index, encoded) in encoded_tiles {
862 add_encode_timing_counters_from_result(&mut timings, &encoded);
863 tile_results[tile_index] = Some(encoded);
864 }
865 let encode_us = encode_start.elapsed().as_micros();
866 timings.htj2k_encode_us = encode_us;
867
868 let output_tiles = tile_results
869 .into_iter()
870 .map(|tile| {
871 tile.unwrap_or(Err(JpegToHtj2kError::Validation(
872 "batch transcode did not produce a tile result",
873 )))
874 })
875 .collect::<Vec<_>>();
876 Ok(batch_output(
877 output_tiles,
878 BatchTranscodeReport {
879 tile_count: tiles.len(),
880 successful_tiles: 0,
881 failed_tiles: 0,
882 transformed_components: reversible_dwt53_batch_jobs,
883 reversible_dwt53_batches,
884 reversible_dwt53_batch_jobs,
885 extract_us,
886 transform_us,
887 encode_us,
888 timings,
889 coefficient_path: options.coefficient_path,
890 },
891 ))
892}
893
894fn jpeg_float97_tile_batch_to_htj2k_with_scratch<
895 A: DctToWaveletStageAccelerator,
896 E: J2kEncodeStageAccelerator,
897>(
898 tiles: &[JpegTileBatchInput<'_>],
899 options: &JpegToHtj2kOptions,
900 scratch: &mut JpegToHtj2kScratch,
901 accelerator: &mut A,
902 encode_accelerator: &mut E,
903) -> Result<EncodedTranscodeBatch, JpegToHtj2kError> {
904 let extract_start = Instant::now();
905 let prepared_results = tiles
906 .par_iter()
907 .enumerate()
908 .map(|(tile_index, tile)| {
909 (
910 tile_index,
911 prepare_float97_batch_tile(tile_index, tile.bytes, options),
912 )
913 })
914 .collect::<Vec<_>>();
915 let extract_us = extract_start.elapsed().as_micros();
916 let mut tile_results: Vec<Option<Result<EncodedTranscode, JpegToHtj2kError>>> =
917 (0..tiles.len()).map(|_| None).collect();
918 let mut prepared_tiles = Vec::new();
919 for (tile_index, result) in prepared_results {
920 match result {
921 Ok(prepared) => prepared_tiles.push(prepared),
922 Err(error) => tile_results[tile_index] = Some(Err(error)),
923 }
924 }
925
926 let transform_start = Instant::now();
927 let mut timings = TranscodeTimingReport::default();
928 let (_dwt97_batches, dwt97_batch_jobs) = transform_float97_batch_tiles(
929 &mut prepared_tiles,
930 options,
931 scratch,
932 accelerator,
933 &mut timings,
934 )?;
935 let transform_us = transform_start.elapsed().as_micros();
936 timings.jpeg_dct_extract_us = extract_us;
937 timings.dct_to_wavelet_total_us = transform_us;
938 timings.tile_count = prepared_tiles.len();
939
940 let encode_start = Instant::now();
941 let encoded_tiles = encode_float97_prepared_tiles(prepared_tiles, options, encode_accelerator);
942 for (tile_index, encoded) in encoded_tiles {
943 add_encode_timing_counters_from_result(&mut timings, &encoded);
944 tile_results[tile_index] = Some(encoded);
945 }
946 let encode_us = encode_start.elapsed().as_micros();
947 timings.htj2k_encode_us = encode_us;
948
949 let output_tiles = tile_results
950 .into_iter()
951 .map(|tile| {
952 tile.unwrap_or(Err(JpegToHtj2kError::Validation(
953 "9/7 batch transcode did not produce a tile result",
954 )))
955 })
956 .collect::<Vec<_>>();
957 Ok(batch_output(
958 output_tiles,
959 BatchTranscodeReport {
960 tile_count: tiles.len(),
961 successful_tiles: 0,
962 failed_tiles: 0,
963 transformed_components: dwt97_batch_jobs,
964 reversible_dwt53_batches: 0,
965 reversible_dwt53_batch_jobs: 0,
966 extract_us,
967 transform_us,
968 encode_us,
969 timings,
970 coefficient_path: options.coefficient_path,
971 },
972 ))
973}
974
975fn transcode_tile_batch_individually<
976 A: DctToWaveletStageAccelerator,
977 E: J2kEncodeStageAccelerator,
978>(
979 tiles: &[JpegTileBatchInput<'_>],
980 options: &JpegToHtj2kOptions,
981 scratch: &mut JpegToHtj2kScratch,
982 accelerator: &mut A,
983 encode_accelerator: &mut E,
984) -> EncodedTranscodeBatch {
985 let start = Instant::now();
986 let output_tiles = tiles
987 .iter()
988 .map(|tile| {
989 jpeg_to_htj2k_with_scratch(
990 tile.bytes,
991 options,
992 scratch,
993 accelerator,
994 encode_accelerator,
995 )
996 })
997 .collect::<Vec<_>>();
998 let mut timings = aggregate_tile_timings(&output_tiles);
999 timings.tile_count = output_tiles.iter().filter(|tile| tile.is_ok()).count();
1000 let elapsed_us = start.elapsed().as_micros();
1001 if timings.dct_to_wavelet_total_us == 0 {
1002 timings.dct_to_wavelet_total_us = elapsed_us
1003 .saturating_sub(timings.jpeg_dct_extract_us)
1004 .saturating_sub(timings.htj2k_encode_us);
1005 }
1006 batch_output(
1007 output_tiles,
1008 BatchTranscodeReport {
1009 tile_count: tiles.len(),
1010 successful_tiles: 0,
1011 failed_tiles: 0,
1012 transformed_components: timings.component_count,
1013 reversible_dwt53_batches: 0,
1014 reversible_dwt53_batch_jobs: 0,
1015 extract_us: timings.jpeg_dct_extract_us,
1016 transform_us: timings.dct_to_wavelet_total_us,
1017 encode_us: timings.htj2k_encode_us,
1018 timings,
1019 coefficient_path: options.coefficient_path,
1020 },
1021 )
1022}
1023
1024fn aggregate_tile_timings(
1025 tiles: &[Result<EncodedTranscode, JpegToHtj2kError>],
1026) -> TranscodeTimingReport {
1027 let mut timings = TranscodeTimingReport::default();
1028 for tile in tiles.iter().filter_map(|tile| tile.as_ref().ok()) {
1029 timings.add_assign(tile.report.timings);
1030 }
1031 timings
1032}
1033
1034fn batch_output(
1035 tiles: Vec<Result<EncodedTranscode, JpegToHtj2kError>>,
1036 mut report: BatchTranscodeReport,
1037) -> EncodedTranscodeBatch {
1038 report.successful_tiles = tiles.iter().filter(|tile| tile.is_ok()).count();
1039 report.failed_tiles = tiles.len().saturating_sub(report.successful_tiles);
1040 EncodedTranscodeBatch { tiles, report }
1041}
1042
1043struct IntegerBatchTile {
1044 tile_index: usize,
1045 jpeg: JpegDctImage,
1046 component_sampling: Vec<(u8, u8)>,
1047 decomposition_levels: u8,
1048 all_unit_sampled: bool,
1049 component_reports: Vec<TranscodeComponentReport>,
1050 precomputed_components: Vec<Option<PrecomputedHtj2k53Component>>,
1051 float_validation_actual: Vec<i32>,
1052 float_validation_expected: Vec<i32>,
1053 integer_validation_actual: Vec<i32>,
1054 integer_validation_expected: Vec<i32>,
1055 timings: TranscodeTimingReport,
1056}
1057
1058struct Float97BatchTile {
1059 tile_index: usize,
1060 jpeg: JpegDctImage,
1061 component_sampling: Vec<(u8, u8)>,
1062 decomposition_levels: u8,
1063 all_unit_sampled: bool,
1064 component_reports: Vec<TranscodeComponentReport>,
1065 precomputed_components: Vec<Option<PrecomputedHtj2k97Component>>,
1066 preencoded_compact_payload: Vec<u8>,
1067 preencoded_compact_components: Vec<Option<PreencodedHtj2k97CompactComponent>>,
1068 preencoded_components: Vec<Option<PreencodedHtj2k97Component>>,
1069 prequantized_components: Vec<Option<PrequantizedHtj2k97Component>>,
1070 float_validation_actual: Vec<i32>,
1071 float_validation_expected: Vec<i32>,
1072 timings: TranscodeTimingReport,
1073}
1074
1075struct Float97PrecomputedBatchRecord {
1076 tile_index: usize,
1077 jpeg: JpegDctImage,
1078 decomposition_levels: u8,
1079 all_unit_sampled: bool,
1080 component_reports: Vec<TranscodeComponentReport>,
1081 float_validation_actual: Vec<i32>,
1082 float_validation_expected: Vec<i32>,
1083 timings: TranscodeTimingReport,
1084}
1085
1086#[derive(Clone, Copy)]
1087struct BatchComponentRef {
1088 tile_index: usize,
1089 component_index: usize,
1090}
1091
1092fn prepare_integer_batch_tile(
1093 tile_index: usize,
1094 bytes: &[u8],
1095 options: &JpegToHtj2kOptions,
1096) -> Result<IntegerBatchTile, JpegToHtj2kError> {
1097 let extract_start = Instant::now();
1098 let jpeg = extract_dct_blocks(bytes, DctExtractOptions::default())?;
1099 let timings = TranscodeTimingReport {
1100 jpeg_dct_extract_us: extract_start.elapsed().as_micros(),
1101 tile_count: 1,
1102 ..TranscodeTimingReport::default()
1103 };
1104 if jpeg.components.is_empty() || jpeg.components.len() > 4 {
1105 return Err(JpegToHtj2kError::Unsupported(
1106 "unsupported JPEG component count for jpeg_to_htj2k",
1107 ));
1108 }
1109 let component_sampling =
1110 component_sampling_for_jpeg(&jpeg.components, jpeg.width, jpeg.height)?;
1111 let decomposition_levels = decomposition_levels_for_components(
1112 &jpeg.components,
1113 options.encode_options.num_decomposition_levels,
1114 )?;
1115 let all_unit_sampled = component_sampling
1116 .iter()
1117 .all(|&(x_rsiz, y_rsiz)| x_rsiz == 1 && y_rsiz == 1);
1118 let component_reports = jpeg
1119 .components
1120 .iter()
1121 .zip(component_sampling.iter().copied())
1122 .map(|(component, (x_rsiz, y_rsiz))| TranscodeComponentReport {
1123 component_index: component.component_index,
1124 width: component.width,
1125 height: component.height,
1126 block_cols: component.block_cols,
1127 block_rows: component.block_rows,
1128 x_rsiz,
1129 y_rsiz,
1130 })
1131 .collect::<Vec<_>>();
1132 let precomputed_components = (0..jpeg.components.len()).map(|_| None).collect();
1133
1134 Ok(IntegerBatchTile {
1135 tile_index,
1136 jpeg,
1137 component_sampling,
1138 decomposition_levels,
1139 all_unit_sampled,
1140 component_reports,
1141 precomputed_components,
1142 float_validation_actual: Vec::new(),
1143 float_validation_expected: Vec::new(),
1144 integer_validation_actual: Vec::new(),
1145 integer_validation_expected: Vec::new(),
1146 timings,
1147 })
1148}
1149
1150fn prepare_float97_batch_tile(
1151 tile_index: usize,
1152 bytes: &[u8],
1153 options: &JpegToHtj2kOptions,
1154) -> Result<Float97BatchTile, JpegToHtj2kError> {
1155 let extract_start = Instant::now();
1156 let jpeg = extract_dct_blocks(bytes, DctExtractOptions::dequantized_only())?;
1157 let timings = TranscodeTimingReport {
1158 jpeg_dct_extract_us: extract_start.elapsed().as_micros(),
1159 tile_count: 1,
1160 ..TranscodeTimingReport::default()
1161 };
1162 if jpeg.components.is_empty() || jpeg.components.len() > 4 {
1163 return Err(JpegToHtj2kError::Unsupported(
1164 "unsupported JPEG component count for jpeg_to_htj2k",
1165 ));
1166 }
1167 let component_sampling =
1168 component_sampling_for_jpeg(&jpeg.components, jpeg.width, jpeg.height)?;
1169 let decomposition_levels = decomposition_levels_for_components(
1170 &jpeg.components,
1171 options.encode_options.num_decomposition_levels,
1172 )?;
1173 let all_unit_sampled = component_sampling
1174 .iter()
1175 .all(|&(x_rsiz, y_rsiz)| x_rsiz == 1 && y_rsiz == 1);
1176 let component_reports = jpeg
1177 .components
1178 .iter()
1179 .zip(component_sampling.iter().copied())
1180 .map(|(component, (x_rsiz, y_rsiz))| TranscodeComponentReport {
1181 component_index: component.component_index,
1182 width: component.width,
1183 height: component.height,
1184 block_cols: component.block_cols,
1185 block_rows: component.block_rows,
1186 x_rsiz,
1187 y_rsiz,
1188 })
1189 .collect::<Vec<_>>();
1190 let precomputed_components = (0..jpeg.components.len()).map(|_| None).collect();
1191 let preencoded_compact_components = (0..jpeg.components.len()).map(|_| None).collect();
1192 let preencoded_components = (0..jpeg.components.len()).map(|_| None).collect();
1193 let prequantized_components = (0..jpeg.components.len()).map(|_| None).collect();
1194
1195 Ok(Float97BatchTile {
1196 tile_index,
1197 jpeg,
1198 component_sampling,
1199 decomposition_levels,
1200 all_unit_sampled,
1201 component_reports,
1202 precomputed_components,
1203 preencoded_compact_payload: Vec::new(),
1204 preencoded_compact_components,
1205 preencoded_components,
1206 prequantized_components,
1207 float_validation_actual: Vec::new(),
1208 float_validation_expected: Vec::new(),
1209 timings,
1210 })
1211}
1212
1213fn transform_integer_batch_tiles<A: DctToWaveletStageAccelerator>(
1214 tiles: &mut [IntegerBatchTile],
1215 options: &JpegToHtj2kOptions,
1216 scratch: &mut JpegToHtj2kScratch,
1217 accelerator: &mut A,
1218 timings: &mut TranscodeTimingReport,
1219) -> Result<(usize, usize), JpegToHtj2kError> {
1220 let groups = batch_component_groups(tiles);
1221 let mut batch_count = 0usize;
1222 let mut job_count = 0usize;
1223
1224 for group in groups {
1225 batch_count = batch_count.saturating_add(1);
1226 job_count = job_count.saturating_add(group.len());
1227 let wavelets =
1228 integer_wavelets_for_batch_group(&group, tiles, scratch, accelerator, timings)?;
1229 for (component_ref, wavelet) in group.into_iter().zip(wavelets) {
1230 store_integer_batch_wavelet(component_ref, &wavelet, tiles, options, scratch)?;
1231 }
1232 }
1233
1234 Ok((batch_count, job_count))
1235}
1236
1237fn transform_float97_batch_tiles<A: DctToWaveletStageAccelerator>(
1238 tiles: &mut [Float97BatchTile],
1239 options: &JpegToHtj2kOptions,
1240 scratch: &mut JpegToHtj2kScratch,
1241 accelerator: &mut A,
1242 timings: &mut TranscodeTimingReport,
1243) -> Result<(usize, usize), JpegToHtj2kError> {
1244 let groups = float97_batch_component_groups(tiles);
1245 let grouped_i16_preencoded = try_store_grouped_i16_preencoded_float97_batches(
1246 &groups,
1247 tiles,
1248 options,
1249 accelerator,
1250 timings,
1251 )?;
1252 let mut batch_count = 0usize;
1253 let mut job_count = 0usize;
1254
1255 for (group_index, group) in groups.into_iter().enumerate() {
1256 batch_count = batch_count.saturating_add(1);
1257 job_count = job_count.saturating_add(group.len());
1258 if grouped_i16_preencoded
1259 .get(group_index)
1260 .copied()
1261 .unwrap_or(false)
1262 {
1263 continue;
1264 }
1265 if try_store_prequantized_float97_batch_group(&group, tiles, options, accelerator, timings)?
1266 {
1267 continue;
1268 }
1269 let wavelets =
1270 float97_wavelets_for_batch_group(&group, tiles, scratch, accelerator, timings)?;
1271 for (component_ref, wavelet) in group.into_iter().zip(wavelets) {
1272 store_float97_batch_wavelet(component_ref, &wavelet, tiles, options, scratch)?;
1273 }
1274 }
1275
1276 Ok((batch_count, job_count))
1277}
1278
1279fn batch_component_groups(tiles: &[IntegerBatchTile]) -> Vec<Vec<BatchComponentRef>> {
1280 let mut groups: Vec<Vec<BatchComponentRef>> = Vec::new();
1281
1282 for (tile_index, tile) in tiles.iter().enumerate() {
1283 for (component_index, component) in tile.jpeg.components.iter().enumerate() {
1284 let component_ref = BatchComponentRef {
1285 tile_index,
1286 component_index,
1287 };
1288 if let Some(group) = groups.iter_mut().find(|group| {
1289 let first = group[0];
1290 same_batch_component_key(
1291 &tiles[first.tile_index],
1292 first.component_index,
1293 tile,
1294 component_index,
1295 )
1296 }) {
1297 group.push(component_ref);
1298 } else {
1299 let _ = component;
1300 groups.push(vec![component_ref]);
1301 }
1302 }
1303 }
1304
1305 groups
1306}
1307
1308fn float97_batch_component_groups(tiles: &[Float97BatchTile]) -> Vec<Vec<BatchComponentRef>> {
1309 let mut groups: Vec<Vec<BatchComponentRef>> = Vec::new();
1310
1311 for (tile_index, tile) in tiles.iter().enumerate() {
1312 for component_index in 0..tile.jpeg.components.len() {
1313 let component_ref = BatchComponentRef {
1314 tile_index,
1315 component_index,
1316 };
1317 if let Some(group) = groups.iter_mut().find(|group| {
1318 let first = group[0];
1319 same_float97_batch_component_key(
1320 &tiles[first.tile_index],
1321 first.component_index,
1322 tile,
1323 component_index,
1324 )
1325 }) {
1326 group.push(component_ref);
1327 } else {
1328 groups.push(vec![component_ref]);
1329 }
1330 }
1331 }
1332
1333 groups
1334}
1335
1336fn same_batch_component_key(
1337 left_tile: &IntegerBatchTile,
1338 left_component_index: usize,
1339 right_tile: &IntegerBatchTile,
1340 right_component_index: usize,
1341) -> bool {
1342 let left = &left_tile.jpeg.components[left_component_index];
1343 let right = &right_tile.jpeg.components[right_component_index];
1344 left.component_index == right.component_index
1345 && left.width == right.width
1346 && left.height == right.height
1347 && left.block_cols == right.block_cols
1348 && left.block_rows == right.block_rows
1349 && left_tile.component_sampling[left_component_index]
1350 == right_tile.component_sampling[right_component_index]
1351}
1352
1353fn same_float97_batch_component_key(
1354 left_tile: &Float97BatchTile,
1355 left_component_index: usize,
1356 right_tile: &Float97BatchTile,
1357 right_component_index: usize,
1358) -> bool {
1359 let left = &left_tile.jpeg.components[left_component_index];
1360 let right = &right_tile.jpeg.components[right_component_index];
1361 left.width == right.width
1362 && left.height == right.height
1363 && left.block_cols == right.block_cols
1364 && left.block_rows == right.block_rows
1365 && left_tile.component_sampling[left_component_index]
1366 == right_tile.component_sampling[right_component_index]
1367}
1368
1369fn integer_wavelets_for_batch_group<A: DctToWaveletStageAccelerator>(
1370 group: &[BatchComponentRef],
1371 tiles: &[IntegerBatchTile],
1372 scratch: &mut JpegToHtj2kScratch,
1373 accelerator: &mut A,
1374 timings: &mut TranscodeTimingReport,
1375) -> Result<Vec<IntegerWavelet>, JpegToHtj2kError> {
1376 let jobs = group
1377 .iter()
1378 .map(|component_ref| {
1379 integer_dct_job_for_component(
1380 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index],
1381 )
1382 })
1383 .collect::<Result<Vec<_>, _>>()?;
1384 record_batch_attempt(timings, group.len());
1385 let accelerator_start = Instant::now();
1386 let accelerated = accelerator
1387 .dct_grid_to_reversible_dwt53_batch(&jobs)
1388 .map_err(JpegToHtj2kError::Accelerator)?;
1389 timings.dct_to_wavelet_accelerator_us = timings
1390 .dct_to_wavelet_accelerator_us
1391 .saturating_add(accelerator_start.elapsed().as_micros());
1392
1393 if let Some(first_levels) = accelerated {
1394 if first_levels.len() != group.len() {
1395 return Err(JpegToHtj2kError::Validation(
1396 "reversible 5/3 batch accelerator returned wrong component count",
1397 ));
1398 }
1399 timings.component_count = timings.component_count.saturating_add(group.len());
1400 record_accelerator_dispatch(timings, group.len());
1401 let decompose_start = Instant::now();
1402 let wavelets = first_levels
1403 .into_iter()
1404 .zip(group.iter().copied())
1405 .map(|(first_level, component_ref)| {
1406 integer_wavelet_from_first_level(
1407 first_level,
1408 tiles[component_ref.tile_index].decomposition_levels,
1409 )
1410 })
1411 .collect();
1412 timings.dwt_decompose_us = timings
1413 .dwt_decompose_us
1414 .saturating_add(decompose_start.elapsed().as_micros());
1415 return Ok(wavelets);
1416 }
1417
1418 group
1419 .iter()
1420 .map(|component_ref| {
1421 integer_direct_wavelet_from_component(
1422 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index],
1423 tiles[component_ref.tile_index].decomposition_levels,
1424 scratch,
1425 accelerator,
1426 timings,
1427 )
1428 })
1429 .collect()
1430}
1431
1432fn i16_htj2k97_jobs_for_batch_group<'a>(
1433 group: &[BatchComponentRef],
1434 tiles: &'a [Float97BatchTile],
1435) -> Result<Vec<DctGridI16ToHtj2k97CodeBlockJob<'a>>, JpegToHtj2kError> {
1436 group
1437 .iter()
1438 .map(|component_ref| {
1439 let tile = &tiles[component_ref.tile_index];
1440 let component = &tile.jpeg.components[component_ref.component_index];
1441 let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
1442 validate_component_block_grid(component)?;
1443 Ok(DctGridI16ToHtj2k97CodeBlockJob {
1444 dequantized_blocks: &component.dequantized_blocks,
1445 block_cols: component.block_cols as usize,
1446 block_rows: component.block_rows as usize,
1447 width: component.width as usize,
1448 height: component.height as usize,
1449 x_rsiz,
1450 y_rsiz,
1451 })
1452 })
1453 .collect()
1454}
1455
1456fn store_compact_preencoded_component(
1457 tile: &mut Float97BatchTile,
1458 component_index: usize,
1459 batch_payload: &[u8],
1460 mut component: PreencodedHtj2k97CompactComponent,
1461) -> Result<(), JpegToHtj2kError> {
1462 if component_index >= tile.preencoded_compact_components.len() {
1463 return Err(JpegToHtj2kError::Validation(
1464 "compact preencoded component index out of range",
1465 ));
1466 }
1467
1468 for resolution in &mut component.resolutions {
1469 for subband in &mut resolution.subbands {
1470 for block in &mut subband.code_blocks {
1471 if block.payload_range.start > block.payload_range.end
1472 || block.payload_range.end > batch_payload.len()
1473 {
1474 return Err(JpegToHtj2kError::Validation(
1475 "compact preencoded payload range out of bounds",
1476 ));
1477 }
1478 let start = tile.preencoded_compact_payload.len();
1479 tile.preencoded_compact_payload
1480 .extend_from_slice(&batch_payload[block.payload_range.clone()]);
1481 let end = tile.preencoded_compact_payload.len();
1482 block.payload_range = start..end;
1483 }
1484 }
1485 }
1486
1487 tile.preencoded_compact_components[component_index] = Some(component);
1488 Ok(())
1489}
1490
1491#[allow(clippy::too_many_lines)]
1492fn try_store_grouped_i16_preencoded_float97_batches<A: DctToWaveletStageAccelerator>(
1493 groups: &[Vec<BatchComponentRef>],
1494 tiles: &mut [Float97BatchTile],
1495 options: &JpegToHtj2kOptions,
1496 accelerator: &mut A,
1497 timings: &mut TranscodeTimingReport,
1498) -> Result<Vec<bool>, JpegToHtj2kError> {
1499 let mut handled = vec![false; groups.len()];
1500 if !accelerator.supports_htj2k97_i16_preencoded_batch()
1501 || options.validate_against_float_reference
1502 || groups.len() <= 1
1503 {
1504 return Ok(handled);
1505 }
1506
1507 let eligible_indices = groups
1508 .iter()
1509 .enumerate()
1510 .filter_map(|(index, group)| {
1511 let eligible = group
1512 .iter()
1513 .all(|component_ref| tiles[component_ref.tile_index].decomposition_levels == 1);
1514 eligible.then_some(index)
1515 })
1516 .collect::<Vec<_>>();
1517 if eligible_indices.len() <= 1 {
1518 return Ok(handled);
1519 }
1520
1521 let codeblock_options = htj2k97_codeblock_options(&options.encode_options);
1522 let total_jobs = eligible_indices
1523 .iter()
1524 .map(|&index| groups[index].len())
1525 .sum::<usize>();
1526 record_accelerator_attempt(timings, total_jobs);
1527 let accelerator_start = Instant::now();
1528 let jobs_by_group = eligible_indices
1529 .iter()
1530 .map(|&index| i16_htj2k97_jobs_for_batch_group(&groups[index], tiles))
1531 .collect::<Result<Vec<_>, JpegToHtj2kError>>()?;
1532 let batches = jobs_by_group
1533 .iter()
1534 .map(|jobs| DctGridI16ToHtj2k97CodeBlockBatch { jobs })
1535 .collect::<Vec<_>>();
1536 let compact_grouped_components = if accelerator.supports_htj2k97_compact_preencoded_batch() {
1537 accelerator
1538 .dct_grid_i16_to_htj2k97_compact_preencoded_batch_groups(&batches, codeblock_options)
1539 .map_err(JpegToHtj2kError::Accelerator)?
1540 } else {
1541 None
1542 };
1543 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1544 add_dwt97_batch_stage_timings(timings, stage_timings);
1545 }
1546 if let Some(compact_grouped_components) = compact_grouped_components {
1547 timings.dct_to_wavelet_accelerator_us = timings
1548 .dct_to_wavelet_accelerator_us
1549 .saturating_add(accelerator_start.elapsed().as_micros());
1550 let compact_payload = compact_grouped_components.payload;
1551 let compact_groups = compact_grouped_components.groups;
1552 if compact_groups.len() != eligible_indices.len() {
1553 return Err(JpegToHtj2kError::Validation(
1554 "9/7 grouped i16 compact preencoded accelerator returned wrong group count",
1555 ));
1556 }
1557 for (&group_index, components) in eligible_indices.iter().zip(compact_groups) {
1558 let group = &groups[group_index];
1559 if components.len() != group.len() {
1560 return Err(JpegToHtj2kError::Validation(
1561 "9/7 grouped i16 compact preencoded accelerator returned wrong component count",
1562 ));
1563 }
1564
1565 timings.component_count = timings.component_count.saturating_add(group.len());
1566 record_batch_dispatch(timings, group.len());
1567 for (component_ref, component) in group.iter().copied().zip(components) {
1568 store_compact_preencoded_component(
1569 &mut tiles[component_ref.tile_index],
1570 component_ref.component_index,
1571 &compact_payload,
1572 component,
1573 )?;
1574 }
1575 handled[group_index] = true;
1576 }
1577 return Ok(handled);
1578 }
1579
1580 let grouped_components = accelerator
1581 .dct_grid_i16_to_htj2k97_preencoded_batch_groups(&batches, codeblock_options)
1582 .map_err(JpegToHtj2kError::Accelerator)?;
1583 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1584 add_dwt97_batch_stage_timings(timings, stage_timings);
1585 }
1586 timings.dct_to_wavelet_accelerator_us = timings
1587 .dct_to_wavelet_accelerator_us
1588 .saturating_add(accelerator_start.elapsed().as_micros());
1589
1590 let Some(grouped_components) = grouped_components else {
1591 return Ok(handled);
1592 };
1593 if grouped_components.len() != eligible_indices.len() {
1594 return Err(JpegToHtj2kError::Validation(
1595 "9/7 grouped i16 preencoded accelerator returned wrong group count",
1596 ));
1597 }
1598
1599 for (&group_index, components) in eligible_indices.iter().zip(grouped_components) {
1600 let group = &groups[group_index];
1601 if components.len() != group.len() {
1602 return Err(JpegToHtj2kError::Validation(
1603 "9/7 grouped i16 preencoded accelerator returned wrong component count",
1604 ));
1605 }
1606
1607 timings.component_count = timings.component_count.saturating_add(group.len());
1608 record_batch_dispatch(timings, group.len());
1609 for (component_ref, component) in group.iter().copied().zip(components) {
1610 tiles[component_ref.tile_index].preencoded_components[component_ref.component_index] =
1611 Some(component);
1612 }
1613 handled[group_index] = true;
1614 }
1615
1616 Ok(handled)
1617}
1618
1619#[allow(clippy::too_many_lines)]
1620fn try_store_prequantized_float97_batch_group<A: DctToWaveletStageAccelerator>(
1621 group: &[BatchComponentRef],
1622 tiles: &mut [Float97BatchTile],
1623 options: &JpegToHtj2kOptions,
1624 accelerator: &mut A,
1625 timings: &mut TranscodeTimingReport,
1626) -> Result<bool, JpegToHtj2kError> {
1627 if !(accelerator.supports_htj2k97_codeblock_batch()
1628 || accelerator.supports_htj2k97_i16_preencoded_batch())
1629 || options.validate_against_float_reference
1630 || group
1631 .iter()
1632 .any(|component_ref| tiles[component_ref.tile_index].decomposition_levels != 1)
1633 {
1634 return Ok(false);
1635 }
1636
1637 let codeblock_options = htj2k97_codeblock_options(&options.encode_options);
1638 if accelerator.supports_htj2k97_i16_preencoded_batch() {
1639 let jobs = i16_htj2k97_jobs_for_batch_group(group, tiles)?;
1640
1641 record_accelerator_attempt(timings, group.len());
1642 let accelerator_start = Instant::now();
1643 let compact_preencoded_components =
1644 if accelerator.supports_htj2k97_compact_preencoded_batch() {
1645 accelerator
1646 .dct_grid_i16_to_htj2k97_compact_preencoded_batch(&jobs, codeblock_options)
1647 .map_err(JpegToHtj2kError::Accelerator)?
1648 } else {
1649 None
1650 };
1651 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1652 add_dwt97_batch_stage_timings(timings, stage_timings);
1653 }
1654 if let Some(compact_batch) = compact_preencoded_components {
1655 timings.dct_to_wavelet_accelerator_us = timings
1656 .dct_to_wavelet_accelerator_us
1657 .saturating_add(accelerator_start.elapsed().as_micros());
1658 if compact_batch.components.len() != group.len() {
1659 return Err(JpegToHtj2kError::Validation(
1660 "9/7 i16 compact preencoded accelerator returned wrong component count",
1661 ));
1662 }
1663
1664 timings.component_count = timings.component_count.saturating_add(group.len());
1665 record_batch_dispatch(timings, group.len());
1666 for (component_ref, component) in group.iter().copied().zip(compact_batch.components) {
1667 store_compact_preencoded_component(
1668 &mut tiles[component_ref.tile_index],
1669 component_ref.component_index,
1670 &compact_batch.payload,
1671 component,
1672 )?;
1673 }
1674
1675 return Ok(true);
1676 }
1677
1678 let preencoded_components = accelerator
1679 .dct_grid_i16_to_htj2k97_preencoded_batch(&jobs, codeblock_options)
1680 .map_err(JpegToHtj2kError::Accelerator)?;
1681 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1682 add_dwt97_batch_stage_timings(timings, stage_timings);
1683 }
1684 timings.dct_to_wavelet_accelerator_us = timings
1685 .dct_to_wavelet_accelerator_us
1686 .saturating_add(accelerator_start.elapsed().as_micros());
1687 if let Some(components) = preencoded_components {
1688 if components.len() != group.len() {
1689 return Err(JpegToHtj2kError::Validation(
1690 "9/7 i16 preencoded accelerator returned wrong component count",
1691 ));
1692 }
1693
1694 timings.component_count = timings.component_count.saturating_add(group.len());
1695 record_batch_dispatch(timings, group.len());
1696 for (component_ref, component) in group.iter().copied().zip(components) {
1697 tiles[component_ref.tile_index].preencoded_components
1698 [component_ref.component_index] = Some(component);
1699 }
1700
1701 return Ok(true);
1702 }
1703 }
1704
1705 let repack_start = Instant::now();
1706 let block_storage = group
1707 .par_iter()
1708 .map(|component_ref| {
1709 dct_blocks_to_8x8_f64(
1710 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index]
1711 .dequantized_blocks,
1712 )
1713 })
1714 .collect::<Vec<_>>();
1715 timings.jpeg_dct_repack_us = timings
1716 .jpeg_dct_repack_us
1717 .saturating_add(repack_start.elapsed().as_micros());
1718
1719 let jobs = group
1720 .iter()
1721 .zip(block_storage.iter())
1722 .map(|(component_ref, blocks)| {
1723 let tile = &tiles[component_ref.tile_index];
1724 let component = &tile.jpeg.components[component_ref.component_index];
1725 let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
1726 validate_component_block_grid(component)?;
1727 Ok(DctGridToHtj2k97CodeBlockJob {
1728 blocks,
1729 block_cols: component.block_cols as usize,
1730 block_rows: component.block_rows as usize,
1731 width: component.width as usize,
1732 height: component.height as usize,
1733 x_rsiz,
1734 y_rsiz,
1735 })
1736 })
1737 .collect::<Result<Vec<_>, JpegToHtj2kError>>()?;
1738
1739 record_accelerator_attempt(timings, group.len());
1740 let accelerator_start = Instant::now();
1741 let preencoded_components = accelerator
1742 .dct_grid_to_htj2k97_preencoded_batch(&jobs, codeblock_options)
1743 .map_err(JpegToHtj2kError::Accelerator)?;
1744 if let Some(components) = preencoded_components {
1745 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1746 add_dwt97_batch_stage_timings(timings, stage_timings);
1747 }
1748 timings.dct_to_wavelet_accelerator_us = timings
1749 .dct_to_wavelet_accelerator_us
1750 .saturating_add(accelerator_start.elapsed().as_micros());
1751 if components.len() != group.len() {
1752 return Err(JpegToHtj2kError::Validation(
1753 "9/7 preencoded accelerator returned wrong component count",
1754 ));
1755 }
1756
1757 timings.component_count = timings.component_count.saturating_add(group.len());
1758 record_batch_dispatch(timings, group.len());
1759 for (component_ref, component) in group.iter().copied().zip(components) {
1760 tiles[component_ref.tile_index].preencoded_components[component_ref.component_index] =
1761 Some(component);
1762 }
1763
1764 return Ok(true);
1765 }
1766
1767 let accelerated_components = accelerator
1768 .dct_grid_to_htj2k97_codeblock_batch(&jobs, codeblock_options)
1769 .map_err(JpegToHtj2kError::Accelerator)?;
1770 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1771 add_dwt97_batch_stage_timings(timings, stage_timings);
1772 }
1773 timings.dct_to_wavelet_accelerator_us = timings
1774 .dct_to_wavelet_accelerator_us
1775 .saturating_add(accelerator_start.elapsed().as_micros());
1776
1777 let Some(components) = accelerated_components else {
1778 return Ok(false);
1779 };
1780 if components.len() != group.len() {
1781 return Err(JpegToHtj2kError::Validation(
1782 "9/7 code-block accelerator returned wrong component count",
1783 ));
1784 }
1785
1786 timings.component_count = timings.component_count.saturating_add(group.len());
1787 record_batch_dispatch(timings, group.len());
1788 for (component_ref, component) in group.iter().copied().zip(components) {
1789 tiles[component_ref.tile_index].prequantized_components[component_ref.component_index] =
1790 Some(component);
1791 }
1792
1793 Ok(true)
1794}
1795
1796fn htj2k97_codeblock_options(options: &JpegToHtj2kEncodeOptions) -> Htj2k97CodeBlockOptions {
1797 Htj2k97CodeBlockOptions {
1798 bit_depth: 8,
1799 guard_bits: options.guard_bits.max(2),
1800 code_block_width_exp: options.code_block_width_exp,
1801 code_block_height_exp: options.code_block_height_exp,
1802 irreversible_quantization_scale: options.irreversible_quantization_scale,
1803 irreversible_quantization_subband_scales: options.irreversible_quantization_subband_scales,
1804 }
1805}
1806
1807fn native_progression_order(
1808 progression: J2kProgressionOrder,
1809) -> j2k_native::EncodeProgressionOrder {
1810 match progression {
1811 J2kProgressionOrder::Lrcp => j2k_native::EncodeProgressionOrder::Lrcp,
1812 J2kProgressionOrder::Rlcp => j2k_native::EncodeProgressionOrder::Rlcp,
1813 J2kProgressionOrder::Rpcl => j2k_native::EncodeProgressionOrder::Rpcl,
1814 J2kProgressionOrder::Pcrl => j2k_native::EncodeProgressionOrder::Pcrl,
1815 J2kProgressionOrder::Cprl => j2k_native::EncodeProgressionOrder::Cprl,
1816 }
1817}
1818
1819fn float97_wavelets_for_batch_group<A: DctToWaveletStageAccelerator>(
1820 group: &[BatchComponentRef],
1821 tiles: &[Float97BatchTile],
1822 scratch: &mut JpegToHtj2kScratch,
1823 accelerator: &mut A,
1824 timings: &mut TranscodeTimingReport,
1825) -> Result<Vec<ComponentWavelet97>, JpegToHtj2kError> {
1826 let repack_start = Instant::now();
1827 let block_storage = group
1828 .iter()
1829 .map(|component_ref| {
1830 dct_blocks_to_8x8_f64(
1831 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index]
1832 .dequantized_blocks,
1833 )
1834 })
1835 .collect::<Vec<_>>();
1836 timings.jpeg_dct_repack_us = timings
1837 .jpeg_dct_repack_us
1838 .saturating_add(repack_start.elapsed().as_micros());
1839
1840 let jobs = group
1841 .iter()
1842 .zip(block_storage.iter())
1843 .map(|(component_ref, blocks)| {
1844 let component =
1845 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index];
1846 validate_component_block_grid(component)?;
1847 Ok(DctGridToDwt97Job {
1848 blocks,
1849 block_cols: component.block_cols as usize,
1850 block_rows: component.block_rows as usize,
1851 width: component.width as usize,
1852 height: component.height as usize,
1853 })
1854 })
1855 .collect::<Result<Vec<_>, JpegToHtj2kError>>()?;
1856
1857 record_batch_attempt(timings, group.len());
1858 let accelerator_start = Instant::now();
1859 let accelerated_first_levels = accelerator
1860 .dct_grid_to_dwt97_batch(&jobs)
1861 .map_err(JpegToHtj2kError::Accelerator)?;
1862 if let Some(stage_timings) = accelerator.last_dwt97_batch_stage_timings() {
1863 add_dwt97_batch_stage_timings(timings, stage_timings);
1864 }
1865 timings.dct_to_wavelet_accelerator_us = timings
1866 .dct_to_wavelet_accelerator_us
1867 .saturating_add(accelerator_start.elapsed().as_micros());
1868
1869 if let Some(first_levels) = accelerated_first_levels {
1870 if first_levels.len() != group.len() {
1871 return Err(JpegToHtj2kError::Validation(
1872 "9/7 batch accelerator returned wrong component count",
1873 ));
1874 }
1875 timings.component_count = timings.component_count.saturating_add(group.len());
1876 record_accelerator_dispatch(timings, group.len());
1877 let decompose_start = Instant::now();
1878 let wavelets = first_levels
1879 .into_par_iter()
1880 .zip(group.par_iter().copied())
1881 .map(|(first_level, component_ref)| {
1882 decompose_97_from_first_level(
1883 first_level,
1884 usize::from(tiles[component_ref.tile_index].decomposition_levels),
1885 )
1886 })
1887 .collect::<Vec<_>>();
1888 timings.dwt_decompose_us = timings
1889 .dwt_decompose_us
1890 .saturating_add(decompose_start.elapsed().as_micros());
1891 return Ok(wavelets);
1892 }
1893
1894 group
1895 .iter()
1896 .map(|component_ref| {
1897 float_direct_97_wavelet_from_component(
1898 &tiles[component_ref.tile_index].jpeg.components[component_ref.component_index],
1899 tiles[component_ref.tile_index].decomposition_levels,
1900 scratch,
1901 accelerator,
1902 timings,
1903 )
1904 })
1905 .collect()
1906}
1907
1908fn add_dwt97_batch_stage_timings(
1909 timings: &mut TranscodeTimingReport,
1910 stage_timings: Dwt97BatchStageTimings,
1911) {
1912 timings.dwt97_batch_pack_upload_us = timings
1913 .dwt97_batch_pack_upload_us
1914 .saturating_add(stage_timings.pack_upload_us);
1915 timings.dwt97_batch_pack_upload_transfers = timings
1916 .dwt97_batch_pack_upload_transfers
1917 .saturating_add(stage_timings.pack_upload_transfers);
1918 timings.dwt97_batch_pack_upload_bytes = timings
1919 .dwt97_batch_pack_upload_bytes
1920 .saturating_add(stage_timings.pack_upload_bytes);
1921 timings.dwt97_batch_resident_dct_handoff_count = timings
1922 .dwt97_batch_resident_dct_handoff_count
1923 .saturating_add(stage_timings.resident_dct_handoff_count);
1924 timings.dwt97_batch_idct_row_lift_us = timings
1925 .dwt97_batch_idct_row_lift_us
1926 .saturating_add(stage_timings.idct_row_lift_us);
1927 timings.dwt97_batch_column_lift_us = timings
1928 .dwt97_batch_column_lift_us
1929 .saturating_add(stage_timings.column_lift_us);
1930 timings.dwt97_batch_resident_dwt_handoff_count = timings
1931 .dwt97_batch_resident_dwt_handoff_count
1932 .saturating_add(stage_timings.resident_dwt_handoff_count);
1933 timings.dwt97_batch_quantize_codeblock_us = timings
1934 .dwt97_batch_quantize_codeblock_us
1935 .saturating_add(stage_timings.quantize_codeblock_us);
1936 timings.dwt97_batch_ht_encode_us = timings
1937 .dwt97_batch_ht_encode_us
1938 .saturating_add(stage_timings.ht_encode_us);
1939 timings.dwt97_batch_ht_kernel_us = timings
1940 .dwt97_batch_ht_kernel_us
1941 .saturating_add(stage_timings.ht_kernel_us);
1942 timings.dwt97_batch_ht_status_readback_us = timings
1943 .dwt97_batch_ht_status_readback_us
1944 .saturating_add(stage_timings.ht_status_readback_us);
1945 timings.dwt97_batch_ht_status_readback_transfers = timings
1946 .dwt97_batch_ht_status_readback_transfers
1947 .saturating_add(stage_timings.ht_status_readback_transfers);
1948 timings.dwt97_batch_ht_status_readback_bytes = timings
1949 .dwt97_batch_ht_status_readback_bytes
1950 .saturating_add(stage_timings.ht_status_readback_bytes);
1951 timings.dwt97_batch_ht_compact_us = timings
1952 .dwt97_batch_ht_compact_us
1953 .saturating_add(stage_timings.ht_compact_us);
1954 timings.dwt97_batch_ht_output_readback_us = timings
1955 .dwt97_batch_ht_output_readback_us
1956 .saturating_add(stage_timings.ht_output_readback_us);
1957 timings.dwt97_batch_ht_output_readback_transfers = timings
1958 .dwt97_batch_ht_output_readback_transfers
1959 .saturating_add(stage_timings.ht_output_readback_transfers);
1960 timings.dwt97_batch_ht_output_readback_bytes = timings
1961 .dwt97_batch_ht_output_readback_bytes
1962 .saturating_add(stage_timings.ht_output_readback_bytes);
1963 timings.dwt97_batch_ht_codeblock_dispatches = timings
1964 .dwt97_batch_ht_codeblock_dispatches
1965 .saturating_add(stage_timings.ht_codeblock_dispatches);
1966 timings.dwt97_batch_readback_us = timings
1967 .dwt97_batch_readback_us
1968 .saturating_add(stage_timings.readback_us);
1969 timings.dwt97_batch_readback_transfers = timings
1970 .dwt97_batch_readback_transfers
1971 .saturating_add(stage_timings.readback_transfers);
1972 timings.dwt97_batch_readback_bytes = timings
1973 .dwt97_batch_readback_bytes
1974 .saturating_add(stage_timings.readback_bytes);
1975}
1976
1977fn record_accelerator_attempt(timings: &mut TranscodeTimingReport, job_count: usize) {
1978 timings.accelerator_attempts = timings.accelerator_attempts.saturating_add(1);
1979 timings.accelerator_jobs = timings.accelerator_jobs.saturating_add(job_count);
1980}
1981
1982fn record_accelerator_dispatch(timings: &mut TranscodeTimingReport, job_count: usize) {
1983 timings.accelerator_dispatches = timings.accelerator_dispatches.saturating_add(1);
1984 timings.accelerator_dispatched_jobs = timings
1985 .accelerator_dispatched_jobs
1986 .saturating_add(job_count);
1987}
1988
1989fn record_batch_attempt(timings: &mut TranscodeTimingReport, job_count: usize) {
1990 timings.batch_count = timings.batch_count.saturating_add(1);
1991 timings.batch_jobs = timings.batch_jobs.saturating_add(job_count);
1992 record_accelerator_attempt(timings, job_count);
1993}
1994
1995fn record_batch_dispatch(timings: &mut TranscodeTimingReport, job_count: usize) {
1996 timings.batch_count = timings.batch_count.saturating_add(1);
1997 timings.batch_jobs = timings.batch_jobs.saturating_add(job_count);
1998 record_accelerator_dispatch(timings, job_count);
1999}
2000
2001fn record_cpu_fallback(timings: &mut TranscodeTimingReport, job_count: usize) {
2002 timings.cpu_fallback_jobs = timings.cpu_fallback_jobs.saturating_add(job_count);
2003}
2004
2005fn store_integer_batch_wavelet(
2006 component_ref: BatchComponentRef,
2007 wavelet: &IntegerWavelet,
2008 tiles: &mut [IntegerBatchTile],
2009 options: &JpegToHtj2kOptions,
2010 scratch: &mut JpegToHtj2kScratch,
2011) -> Result<(), JpegToHtj2kError> {
2012 let tile = &mut tiles[component_ref.tile_index];
2013 let component = &tile.jpeg.components[component_ref.component_index];
2014 let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
2015 let actual_coefficients = flatten_integer_wavelet(wavelet);
2016 tile.precomputed_components[component_ref.component_index] =
2017 Some(PrecomputedHtj2k53Component {
2018 x_rsiz,
2019 y_rsiz,
2020 dwt: j2k_dwt_from_integer_wavelet(wavelet),
2021 });
2022
2023 if options.validate_against_float_reference {
2024 tile.float_validation_actual
2025 .extend(actual_coefficients.clone());
2026 tile.float_validation_expected
2027 .extend(float_reference_coefficients(
2028 component,
2029 tile.decomposition_levels,
2030 scratch,
2031 )?);
2032 }
2033 if options.validate_against_integer_reference {
2034 tile.integer_validation_actual.extend(actual_coefficients);
2035 tile.integer_validation_expected
2036 .extend(integer_reference_coefficients(
2037 component,
2038 tile.decomposition_levels,
2039 )?);
2040 }
2041
2042 Ok(())
2043}
2044
2045fn store_float97_batch_wavelet(
2046 component_ref: BatchComponentRef,
2047 wavelet: &ComponentWavelet97,
2048 tiles: &mut [Float97BatchTile],
2049 options: &JpegToHtj2kOptions,
2050 scratch: &mut JpegToHtj2kScratch,
2051) -> Result<(), JpegToHtj2kError> {
2052 let tile = &mut tiles[component_ref.tile_index];
2053 let component = &tile.jpeg.components[component_ref.component_index];
2054 let (x_rsiz, y_rsiz) = tile.component_sampling[component_ref.component_index];
2055 tile.precomputed_components[component_ref.component_index] =
2056 Some(PrecomputedHtj2k97Component {
2057 x_rsiz,
2058 y_rsiz,
2059 dwt: j2k_dwt97_from_wavelet(
2060 wavelet,
2061 component.width as usize,
2062 component.height as usize,
2063 ),
2064 });
2065
2066 if options.validate_against_float_reference {
2067 let actual_coefficients = rounded_wavelet97_i32(wavelet)?;
2068 tile.float_validation_actual.extend(actual_coefficients);
2069 tile.float_validation_expected
2070 .extend(float97_reference_coefficients(
2071 component,
2072 tile.decomposition_levels,
2073 scratch,
2074 )?);
2075 }
2076
2077 Ok(())
2078}
2079
2080fn record_encode_dispatch_delta(
2081 timings: &mut TranscodeTimingReport,
2082 before: J2kEncodeDispatchReport,
2083 after: J2kEncodeDispatchReport,
2084) {
2085 let delta = after.saturating_delta(before);
2086 timings.htj2k_encode_accelerator_dispatches = timings
2087 .htj2k_encode_accelerator_dispatches
2088 .saturating_add(delta.total());
2089 timings.htj2k_encode_ht_code_block_dispatches = timings
2090 .htj2k_encode_ht_code_block_dispatches
2091 .saturating_add(delta.ht_code_block);
2092 timings.htj2k_encode_packetization_dispatches = timings
2093 .htj2k_encode_packetization_dispatches
2094 .saturating_add(delta.packetization);
2095}
2096
2097fn add_encode_timing_counters_from_result(
2098 timings: &mut TranscodeTimingReport,
2099 tile: &Result<EncodedTranscode, JpegToHtj2kError>,
2100) {
2101 let Ok(tile) = tile else {
2102 return;
2103 };
2104 timings.htj2k_encode_accelerator_dispatches = timings
2105 .htj2k_encode_accelerator_dispatches
2106 .saturating_add(tile.report.timings.htj2k_encode_accelerator_dispatches);
2107 timings.htj2k_encode_ht_code_block_dispatches = timings
2108 .htj2k_encode_ht_code_block_dispatches
2109 .saturating_add(tile.report.timings.htj2k_encode_ht_code_block_dispatches);
2110 timings.htj2k_encode_packetization_dispatches = timings
2111 .htj2k_encode_packetization_dispatches
2112 .saturating_add(tile.report.timings.htj2k_encode_packetization_dispatches);
2113}
2114
2115fn encode_integer_prepared_tiles<E: J2kEncodeStageAccelerator>(
2116 prepared_tiles: Vec<IntegerBatchTile>,
2117 options: &JpegToHtj2kOptions,
2118 encode_accelerator: &mut E,
2119) -> Vec<(usize, Result<EncodedTranscode, JpegToHtj2kError>)> {
2120 if encode_accelerator.prefer_parallel_cpu_tile_encode() {
2121 return prepared_tiles
2122 .into_par_iter()
2123 .map(|prepared| {
2124 let tile_index = prepared.tile_index;
2125 let mut cpu_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
2126 (
2127 tile_index,
2128 encode_integer_batch_tile(prepared, options, &mut cpu_accelerator),
2129 )
2130 })
2131 .collect();
2132 }
2133
2134 prepared_tiles
2135 .into_iter()
2136 .map(|prepared| {
2137 let tile_index = prepared.tile_index;
2138 (
2139 tile_index,
2140 encode_integer_batch_tile(prepared, options, encode_accelerator),
2141 )
2142 })
2143 .collect()
2144}
2145
2146fn encode_float97_prepared_tiles<E: J2kEncodeStageAccelerator>(
2147 prepared_tiles: Vec<Float97BatchTile>,
2148 options: &JpegToHtj2kOptions,
2149 encode_accelerator: &mut E,
2150) -> Vec<(usize, Result<EncodedTranscode, JpegToHtj2kError>)> {
2151 if !encode_accelerator.prefer_parallel_cpu_tile_encode()
2152 && can_encode_float97_precomputed_tiles_batch(&prepared_tiles, options)
2153 {
2154 return encode_float97_precomputed_tiles_batch(prepared_tiles, options, encode_accelerator);
2155 }
2156
2157 if encode_accelerator.prefer_parallel_cpu_tile_encode() {
2158 return prepared_tiles
2159 .into_par_iter()
2160 .map(|prepared| {
2161 let tile_index = prepared.tile_index;
2162 let mut cpu_accelerator = CpuOnlyJ2kEncodeStageAccelerator;
2163 (
2164 tile_index,
2165 encode_float97_batch_tile(prepared, options, &mut cpu_accelerator),
2166 )
2167 })
2168 .collect();
2169 }
2170
2171 prepared_tiles
2172 .into_iter()
2173 .map(|prepared| {
2174 let tile_index = prepared.tile_index;
2175 (
2176 tile_index,
2177 encode_float97_batch_tile(prepared, options, encode_accelerator),
2178 )
2179 })
2180 .collect()
2181}
2182
2183fn can_encode_float97_precomputed_tiles_batch(
2184 prepared_tiles: &[Float97BatchTile],
2185 options: &JpegToHtj2kOptions,
2186) -> bool {
2187 options.encode_options.num_layers == 1
2188 && prepared_tiles.iter().all(|tile| {
2189 tile.precomputed_components.iter().all(Option::is_some)
2190 && tile.preencoded_compact_payload.is_empty()
2191 && tile
2192 .preencoded_compact_components
2193 .iter()
2194 .all(Option::is_none)
2195 && tile.preencoded_components.iter().all(Option::is_none)
2196 && tile.prequantized_components.iter().all(Option::is_none)
2197 })
2198}
2199
2200#[allow(clippy::too_many_lines)]
2201fn encode_float97_precomputed_tiles_batch<E: J2kEncodeStageAccelerator>(
2202 prepared_tiles: Vec<Float97BatchTile>,
2203 options: &JpegToHtj2kOptions,
2204 encode_accelerator: &mut E,
2205) -> Vec<(usize, Result<EncodedTranscode, JpegToHtj2kError>)> {
2206 let mut records = Vec::with_capacity(prepared_tiles.len());
2207 let mut images = Vec::with_capacity(prepared_tiles.len());
2208
2209 for tile in prepared_tiles {
2210 let Float97BatchTile {
2211 tile_index,
2212 jpeg,
2213 decomposition_levels,
2214 all_unit_sampled,
2215 component_reports,
2216 precomputed_components,
2217 preencoded_compact_payload: _,
2218 preencoded_compact_components: _,
2219 preencoded_components: _,
2220 prequantized_components: _,
2221 float_validation_actual,
2222 float_validation_expected,
2223 timings,
2224 ..
2225 } = tile;
2226 let components = match precomputed_components
2227 .into_iter()
2228 .map(|component| {
2229 component.ok_or(JpegToHtj2kError::Validation(
2230 "9/7 precomputed batch transcode did not produce all components",
2231 ))
2232 })
2233 .collect::<Result<Vec<_>, _>>()
2234 {
2235 Ok(components) => components,
2236 Err(error) => return vec![(tile_index, Err(error))],
2237 };
2238 images.push(PrecomputedHtj2k97Image {
2239 width: jpeg.width,
2240 height: jpeg.height,
2241 bit_depth: 8,
2242 signed: false,
2243 components,
2244 });
2245 records.push(Float97PrecomputedBatchRecord {
2246 tile_index,
2247 jpeg,
2248 decomposition_levels,
2249 all_unit_sampled,
2250 component_reports,
2251 float_validation_actual,
2252 float_validation_expected,
2253 timings,
2254 });
2255 }
2256
2257 let encode_start = Instant::now();
2258 let encode_dispatch_before = encode_accelerator.dispatch_report();
2259 let native_images = images;
2260 let codestreams = {
2261 let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2262 let native_encode_options = options.encode_options.to_native();
2263 match encode_precomputed_htj2k_97_batch_with_accelerator(
2264 &native_images,
2265 &native_encode_options,
2266 &mut native_encode_accelerator,
2267 ) {
2268 Ok(codestreams) => codestreams,
2269 Err(error) => {
2270 return records
2271 .into_iter()
2272 .map(|record| (record.tile_index, Err(JpegToHtj2kError::Encode(error))))
2273 .collect();
2274 }
2275 }
2276 };
2277 let encode_dispatch_after = encode_accelerator.dispatch_report();
2278 let encode_us = encode_start.elapsed().as_micros();
2279
2280 if codestreams.len() != records.len() {
2281 return records
2282 .into_iter()
2283 .map(|record| {
2284 (
2285 record.tile_index,
2286 Err(JpegToHtj2kError::Validation(
2287 "9/7 precomputed batch encode returned the wrong tile count",
2288 )),
2289 )
2290 })
2291 .collect();
2292 }
2293
2294 records
2295 .into_iter()
2296 .zip(codestreams)
2297 .enumerate()
2298 .map(|(batch_index, (record, codestream))| {
2299 let encode_measurement = (batch_index == 0).then_some((
2300 encode_dispatch_before,
2301 encode_dispatch_after,
2302 encode_us,
2303 ));
2304 (
2305 record.tile_index,
2306 encoded_float97_precomputed_batch_record(
2307 record,
2308 codestream,
2309 options,
2310 encode_measurement,
2311 ),
2312 )
2313 })
2314 .collect()
2315}
2316
2317fn encoded_float97_precomputed_batch_record(
2318 record: Float97PrecomputedBatchRecord,
2319 codestream: Vec<u8>,
2320 options: &JpegToHtj2kOptions,
2321 encode_measurement: Option<(J2kEncodeDispatchReport, J2kEncodeDispatchReport, u128)>,
2322) -> Result<EncodedTranscode, JpegToHtj2kError> {
2323 let Float97PrecomputedBatchRecord {
2324 jpeg,
2325 decomposition_levels,
2326 all_unit_sampled,
2327 component_reports,
2328 float_validation_actual,
2329 float_validation_expected,
2330 mut timings,
2331 ..
2332 } = record;
2333
2334 if let Some((encode_dispatch_before, encode_dispatch_after, encode_us)) = encode_measurement {
2335 record_encode_dispatch_delta(&mut timings, encode_dispatch_before, encode_dispatch_after);
2336 timings.htj2k_encode_us = encode_us;
2337 }
2338 let encode_us = timings.htj2k_encode_us;
2339 let float_reference_metrics = if options.validate_against_float_reference {
2340 Some(error_metrics_i32(
2341 &float_validation_actual,
2342 &float_validation_expected,
2343 )?)
2344 } else {
2345 None
2346 };
2347
2348 Ok(EncodedTranscode {
2349 codestream,
2350 report: TranscodeReport {
2351 width: jpeg.width,
2352 height: jpeg.height,
2353 component_count: jpeg.components.len(),
2354 components: component_reports,
2355 float_reference_classification: float_reference_metrics
2356 .as_ref()
2357 .map(TranscodeValidationClassification::classify_metrics),
2358 float_reference_metrics,
2359 integer_reference_classification: None,
2360 integer_reference_metrics: None,
2361 decomposition_levels,
2362 coefficient_path: options.coefficient_path,
2363 path: transcode_path_name(all_unit_sampled, options.coefficient_path),
2364 extract_us: timings.jpeg_dct_extract_us,
2365 transform_us: 0,
2366 encode_us,
2367 timings,
2368 },
2369 })
2370}
2371
2372fn encode_integer_batch_tile<E: J2kEncodeStageAccelerator>(
2373 tile: IntegerBatchTile,
2374 options: &JpegToHtj2kOptions,
2375 encode_accelerator: &mut E,
2376) -> Result<EncodedTranscode, JpegToHtj2kError> {
2377 let mut timings = tile.timings;
2378 let components = tile
2379 .precomputed_components
2380 .into_iter()
2381 .map(|component| {
2382 component.ok_or(JpegToHtj2kError::Validation(
2383 "integer batch transcode did not produce all components",
2384 ))
2385 })
2386 .collect::<Result<Vec<_>, _>>()?;
2387 let encode_start = Instant::now();
2388 let precomputed = PrecomputedHtj2k53Image {
2389 width: tile.jpeg.width,
2390 height: tile.jpeg.height,
2391 bit_depth: 8,
2392 signed: false,
2393 components,
2394 };
2395 let encode_dispatch_before = encode_accelerator.dispatch_report();
2396 let native_precomputed = precomputed;
2397 let codestream = {
2398 let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2399 let native_encode_options = options.encode_options.to_native();
2400 encode_precomputed_htj2k_53_with_accelerator(
2401 &native_precomputed,
2402 &native_encode_options,
2403 &mut native_encode_accelerator,
2404 )
2405 .map_err(JpegToHtj2kError::Encode)?
2406 };
2407 record_encode_dispatch_delta(
2408 &mut timings,
2409 encode_dispatch_before,
2410 encode_accelerator.dispatch_report(),
2411 );
2412 let encode_us = encode_start.elapsed().as_micros();
2413 timings.htj2k_encode_us = encode_us;
2414 let integer_reference_metrics = if options.validate_against_integer_reference {
2415 Some(error_metrics_i32(
2416 &tile.integer_validation_actual,
2417 &tile.integer_validation_expected,
2418 )?)
2419 } else {
2420 None
2421 };
2422 let float_reference_metrics = if options.validate_against_float_reference {
2423 Some(error_metrics_i32(
2424 &tile.float_validation_actual,
2425 &tile.float_validation_expected,
2426 )?)
2427 } else {
2428 None
2429 };
2430
2431 Ok(EncodedTranscode {
2432 codestream,
2433 report: TranscodeReport {
2434 width: tile.jpeg.width,
2435 height: tile.jpeg.height,
2436 component_count: tile.jpeg.components.len(),
2437 components: tile.component_reports,
2438 float_reference_classification: float_reference_metrics
2439 .as_ref()
2440 .map(TranscodeValidationClassification::classify_metrics),
2441 float_reference_metrics,
2442 integer_reference_classification: integer_reference_metrics
2443 .as_ref()
2444 .map(TranscodeValidationClassification::classify_metrics),
2445 integer_reference_metrics,
2446 decomposition_levels: tile.decomposition_levels,
2447 coefficient_path: options.coefficient_path,
2448 path: transcode_path_name(tile.all_unit_sampled, options.coefficient_path),
2449 extract_us: timings.jpeg_dct_extract_us,
2450 transform_us: 0,
2451 encode_us,
2452 timings,
2453 },
2454 })
2455}
2456
2457#[allow(clippy::too_many_lines)]
2458fn encode_float97_batch_tile<E: J2kEncodeStageAccelerator>(
2459 tile: Float97BatchTile,
2460 options: &JpegToHtj2kOptions,
2461 encode_accelerator: &mut E,
2462) -> Result<EncodedTranscode, JpegToHtj2kError> {
2463 let Float97BatchTile {
2464 jpeg,
2465 decomposition_levels,
2466 all_unit_sampled,
2467 component_reports,
2468 precomputed_components,
2469 preencoded_compact_payload,
2470 preencoded_compact_components,
2471 preencoded_components,
2472 prequantized_components,
2473 float_validation_actual,
2474 float_validation_expected,
2475 mut timings,
2476 ..
2477 } = tile;
2478
2479 let encode_start = Instant::now();
2480 let encode_dispatch_before = encode_accelerator.dispatch_report();
2481 let codestream = {
2482 let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2483 let native_encode_options = options.encode_options.to_native();
2484 if preencoded_compact_components.iter().any(Option::is_some) {
2485 let components = preencoded_compact_components
2486 .into_iter()
2487 .map(|component| {
2488 component.ok_or(JpegToHtj2kError::Validation(
2489 "9/7 compact preencoded batch transcode did not produce all components",
2490 ))
2491 })
2492 .collect::<Result<Vec<_>, _>>()?;
2493 let preencoded = PreencodedHtj2k97CompactImage {
2494 width: jpeg.width,
2495 height: jpeg.height,
2496 bit_depth: 8,
2497 signed: false,
2498 payload: preencoded_compact_payload,
2499 components,
2500 };
2501 encode_preencoded_htj2k_97_compact_owned_with_accelerator(
2502 preencoded,
2503 &native_encode_options,
2504 &mut native_encode_accelerator,
2505 )
2506 .map_err(JpegToHtj2kError::Encode)?
2507 } else if preencoded_components.iter().any(Option::is_some) {
2508 let components = preencoded_components
2509 .into_iter()
2510 .map(|component| {
2511 component.ok_or(JpegToHtj2kError::Validation(
2512 "9/7 preencoded batch transcode did not produce all components",
2513 ))
2514 })
2515 .collect::<Result<Vec<_>, _>>()?;
2516 let preencoded = PreencodedHtj2k97Image {
2517 width: jpeg.width,
2518 height: jpeg.height,
2519 bit_depth: 8,
2520 signed: false,
2521 components,
2522 };
2523 encode_preencoded_htj2k_97_owned_with_accelerator(
2524 preencoded,
2525 &native_encode_options,
2526 &mut native_encode_accelerator,
2527 )
2528 .map_err(JpegToHtj2kError::Encode)?
2529 } else if prequantized_components.iter().any(Option::is_some) {
2530 let components = prequantized_components
2531 .into_iter()
2532 .map(|component| {
2533 component.ok_or(JpegToHtj2kError::Validation(
2534 "9/7 code-block batch transcode did not produce all components",
2535 ))
2536 })
2537 .collect::<Result<Vec<_>, _>>()?;
2538 let prequantized = PrequantizedHtj2k97Image {
2539 width: jpeg.width,
2540 height: jpeg.height,
2541 bit_depth: 8,
2542 signed: false,
2543 components,
2544 };
2545 let native_prequantized = prequantized;
2546 encode_prequantized_htj2k_97_with_accelerator(
2547 &native_prequantized,
2548 &native_encode_options,
2549 &mut native_encode_accelerator,
2550 )
2551 .map_err(JpegToHtj2kError::Encode)?
2552 } else {
2553 let components = precomputed_components
2554 .into_iter()
2555 .map(|component| {
2556 component.ok_or(JpegToHtj2kError::Validation(
2557 "9/7 batch transcode did not produce all components",
2558 ))
2559 })
2560 .collect::<Result<Vec<_>, _>>()?;
2561 let precomputed = PrecomputedHtj2k97Image {
2562 width: jpeg.width,
2563 height: jpeg.height,
2564 bit_depth: 8,
2565 signed: false,
2566 components,
2567 };
2568 let native_precomputed = precomputed;
2569 encode_precomputed_htj2k_97_with_accelerator(
2570 &native_precomputed,
2571 &native_encode_options,
2572 &mut native_encode_accelerator,
2573 )
2574 .map_err(JpegToHtj2kError::Encode)?
2575 }
2576 };
2577 record_encode_dispatch_delta(
2578 &mut timings,
2579 encode_dispatch_before,
2580 encode_accelerator.dispatch_report(),
2581 );
2582 let encode_us = encode_start.elapsed().as_micros();
2583 timings.htj2k_encode_us = encode_us;
2584 let float_reference_metrics = if options.validate_against_float_reference {
2585 Some(error_metrics_i32(
2586 &float_validation_actual,
2587 &float_validation_expected,
2588 )?)
2589 } else {
2590 None
2591 };
2592
2593 Ok(EncodedTranscode {
2594 codestream,
2595 report: TranscodeReport {
2596 width: jpeg.width,
2597 height: jpeg.height,
2598 component_count: jpeg.components.len(),
2599 components: component_reports,
2600 float_reference_classification: float_reference_metrics
2601 .as_ref()
2602 .map(TranscodeValidationClassification::classify_metrics),
2603 float_reference_metrics,
2604 integer_reference_classification: None,
2605 integer_reference_metrics: None,
2606 decomposition_levels,
2607 coefficient_path: options.coefficient_path,
2608 path: transcode_path_name(all_unit_sampled, options.coefficient_path),
2609 extract_us: timings.jpeg_dct_extract_us,
2610 transform_us: 0,
2611 encode_us,
2612 timings,
2613 },
2614 })
2615}
2616
2617#[allow(clippy::too_many_lines)]
2618fn jpeg_to_htj2k_with_scratch<A: DctToWaveletStageAccelerator, E: J2kEncodeStageAccelerator>(
2619 bytes: &[u8],
2620 options: &JpegToHtj2kOptions,
2621 scratch: &mut JpegToHtj2kScratch,
2622 accelerator: &mut A,
2623 encode_accelerator: &mut E,
2624) -> Result<EncodedTranscode, JpegToHtj2kError> {
2625 validate_transcode_options(options)?;
2626 let mut timings = TranscodeTimingReport {
2627 tile_count: 1,
2628 ..TranscodeTimingReport::default()
2629 };
2630
2631 let extract_start = Instant::now();
2632 let jpeg = extract_dct_blocks(bytes, DctExtractOptions::default())?;
2633 let extract_us = extract_start.elapsed().as_micros();
2634 timings.jpeg_dct_extract_us = extract_us;
2635
2636 if jpeg.components.is_empty() || jpeg.components.len() > 4 {
2637 return Err(JpegToHtj2kError::Unsupported(
2638 "unsupported JPEG component count for jpeg_to_htj2k",
2639 ));
2640 }
2641 let component_sampling =
2642 component_sampling_for_jpeg(&jpeg.components, jpeg.width, jpeg.height)?;
2643 let decomposition_levels = decomposition_levels_for_components(
2644 &jpeg.components,
2645 options.encode_options.num_decomposition_levels,
2646 )?;
2647 let all_unit_sampled = component_sampling
2648 .iter()
2649 .all(|&(x_rsiz, y_rsiz)| x_rsiz == 1 && y_rsiz == 1);
2650 let component_reports = jpeg
2651 .components
2652 .iter()
2653 .zip(component_sampling.iter().copied())
2654 .map(|(component, (x_rsiz, y_rsiz))| TranscodeComponentReport {
2655 component_index: component.component_index,
2656 width: component.width,
2657 height: component.height,
2658 block_cols: component.block_cols,
2659 block_rows: component.block_rows,
2660 x_rsiz,
2661 y_rsiz,
2662 })
2663 .collect();
2664
2665 let transform_start = Instant::now();
2666 let component_batch = transcode_component_batch(
2667 &jpeg.components,
2668 &component_sampling,
2669 decomposition_levels,
2670 options,
2671 scratch,
2672 accelerator,
2673 &mut timings,
2674 )?;
2675 let transform_us = transform_start.elapsed().as_micros();
2676 timings.dct_to_wavelet_total_us = transform_us;
2677
2678 let encode_start = Instant::now();
2679 let encode_dispatch_before = encode_accelerator.dispatch_report();
2680 let native_encode_options = options.encode_options.to_native();
2681 let codestream = match component_batch.precomputed_components {
2682 PrecomputedComponentBatch::Dwt53(components) => {
2683 let precomputed = PrecomputedHtj2k53Image {
2684 width: jpeg.width,
2685 height: jpeg.height,
2686 bit_depth: 8,
2687 signed: false,
2688 components,
2689 };
2690 let native_precomputed = precomputed;
2691 let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2692 encode_precomputed_htj2k_53_with_accelerator(
2693 &native_precomputed,
2694 &native_encode_options,
2695 &mut native_encode_accelerator,
2696 )
2697 .map_err(JpegToHtj2kError::Encode)?
2698 }
2699 PrecomputedComponentBatch::Dwt97(components) => {
2700 let precomputed = PrecomputedHtj2k97Image {
2701 width: jpeg.width,
2702 height: jpeg.height,
2703 bit_depth: 8,
2704 signed: false,
2705 components,
2706 };
2707 let native_precomputed = precomputed;
2708 let mut native_encode_accelerator = NativeEncodeStageAdapter::new(encode_accelerator);
2709 encode_precomputed_htj2k_97_with_accelerator(
2710 &native_precomputed,
2711 &native_encode_options,
2712 &mut native_encode_accelerator,
2713 )
2714 .map_err(JpegToHtj2kError::Encode)?
2715 }
2716 };
2717 record_encode_dispatch_delta(
2718 &mut timings,
2719 encode_dispatch_before,
2720 encode_accelerator.dispatch_report(),
2721 );
2722 let encode_us = encode_start.elapsed().as_micros();
2723 timings.htj2k_encode_us = encode_us;
2724
2725 Ok(EncodedTranscode {
2726 codestream,
2727 report: TranscodeReport {
2728 width: jpeg.width,
2729 height: jpeg.height,
2730 component_count: jpeg.components.len(),
2731 components: component_reports,
2732 float_reference_classification: component_batch
2733 .float_reference_metrics
2734 .as_ref()
2735 .map(TranscodeValidationClassification::classify_metrics),
2736 float_reference_metrics: component_batch.float_reference_metrics,
2737 integer_reference_classification: component_batch
2738 .integer_reference_metrics
2739 .as_ref()
2740 .map(TranscodeValidationClassification::classify_metrics),
2741 integer_reference_metrics: component_batch.integer_reference_metrics,
2742 decomposition_levels,
2743 coefficient_path: options.coefficient_path,
2744 path: transcode_path_name(all_unit_sampled, options.coefficient_path),
2745 extract_us,
2746 transform_us,
2747 encode_us,
2748 timings,
2749 },
2750 })
2751}
2752
2753fn validate_transcode_options(options: &JpegToHtj2kOptions) -> Result<(), JpegToHtj2kError> {
2754 if !options.encode_options.use_ht_block_coding {
2755 return Err(JpegToHtj2kError::Unsupported(
2756 "jpeg_to_htj2k requires HT block coding",
2757 ));
2758 }
2759 if options.encode_options.use_mct {
2760 return Err(JpegToHtj2kError::Unsupported(
2761 "jpeg_to_htj2k requires use_mct=false because JPEG components stay in native color space",
2762 ));
2763 }
2764
2765 match (options.coefficient_path, options.encode_options.reversible) {
2766 (
2767 JpegToHtj2kCoefficientPath::IntegerDirect53
2768 | JpegToHtj2kCoefficientPath::FloatDirectLinear53,
2769 true,
2770 )
2771 | (JpegToHtj2kCoefficientPath::FloatDirectLinear97, false) => Ok(()),
2772 (
2773 JpegToHtj2kCoefficientPath::IntegerDirect53
2774 | JpegToHtj2kCoefficientPath::FloatDirectLinear53,
2775 false,
2776 ) => Err(JpegToHtj2kError::Unsupported(
2777 "5/3 coefficient path requires reversible HTJ2K encode",
2778 )),
2779 (JpegToHtj2kCoefficientPath::FloatDirectLinear97, true) => {
2780 Err(JpegToHtj2kError::Unsupported(
2781 "9/7 coefficient path requires irreversible HTJ2K encode",
2782 ))
2783 }
2784 }
2785}
2786
2787struct ComponentTranscodeBatch {
2788 precomputed_components: PrecomputedComponentBatch,
2789 float_reference_metrics: Option<TranscodeValidationMetrics>,
2790 integer_reference_metrics: Option<TranscodeValidationMetrics>,
2791}
2792
2793enum PrecomputedComponentBatch {
2794 Dwt53(Vec<PrecomputedHtj2k53Component>),
2795 Dwt97(Vec<PrecomputedHtj2k97Component>),
2796}
2797
2798struct ComponentTranscodeResult {
2799 precomputed: PrecomputedComponent,
2800 float_validation_coefficients: Option<(Vec<i32>, Vec<i32>)>,
2801 integer_validation_coefficients: Option<(Vec<i32>, Vec<i32>)>,
2802}
2803
2804enum PrecomputedComponent {
2805 Dwt53(PrecomputedHtj2k53Component),
2806 Dwt97(PrecomputedHtj2k97Component),
2807}
2808
2809struct ComponentWavelet {
2810 final_ll: Vec<f64>,
2811 final_ll_width: usize,
2812 final_ll_height: usize,
2813 levels: Vec<Dwt53TwoDimensional<f64>>,
2814}
2815
2816struct ComponentWavelet97 {
2817 final_ll: Vec<f64>,
2818 final_ll_width: usize,
2819 final_ll_height: usize,
2820 levels: Vec<Dwt97TwoDimensional<f64>>,
2821}
2822
2823struct IntegerWaveletLevel {
2824 width: usize,
2825 height: usize,
2826 low_width: usize,
2827 low_height: usize,
2828 high_width: usize,
2829 high_height: usize,
2830 hl: Vec<i32>,
2831 lh: Vec<i32>,
2832 hh: Vec<i32>,
2833}
2834
2835struct IntegerWavelet {
2836 final_ll: Vec<i32>,
2837 final_ll_width: usize,
2838 final_ll_height: usize,
2839 levels: Vec<IntegerWaveletLevel>,
2840}
2841
2842fn transcode_component_batch(
2843 components: &[JpegDctComponent],
2844 component_sampling: &[(u8, u8)],
2845 decomposition_levels: u8,
2846 options: &JpegToHtj2kOptions,
2847 scratch: &mut JpegToHtj2kScratch,
2848 accelerator: &mut impl DctToWaveletStageAccelerator,
2849 timings: &mut TranscodeTimingReport,
2850) -> Result<ComponentTranscodeBatch, JpegToHtj2kError> {
2851 if matches!(
2852 options.coefficient_path,
2853 JpegToHtj2kCoefficientPath::FloatDirectLinear97
2854 ) && options.validate_against_integer_reference
2855 {
2856 return Err(JpegToHtj2kError::Unsupported(
2857 "integer reversible validation is only defined for 5/3 coefficient paths",
2858 ));
2859 }
2860
2861 if matches!(
2862 options.coefficient_path,
2863 JpegToHtj2kCoefficientPath::IntegerDirect53
2864 ) {
2865 return transcode_integer_component_batch(
2866 components,
2867 component_sampling,
2868 decomposition_levels,
2869 options,
2870 scratch,
2871 accelerator,
2872 timings,
2873 );
2874 }
2875
2876 let mut precomputed_53 = Vec::with_capacity(components.len());
2877 let mut precomputed_97 = Vec::with_capacity(components.len());
2878 let mut float_validation_actual = Vec::new();
2879 let mut float_validation_expected = Vec::new();
2880 let mut integer_validation_actual = Vec::new();
2881 let mut integer_validation_expected = Vec::new();
2882
2883 for (component, (x_rsiz, y_rsiz)) in components.iter().zip(component_sampling.iter().copied()) {
2884 let component_result = component_to_precomputed_htj2k(
2885 component,
2886 x_rsiz,
2887 y_rsiz,
2888 decomposition_levels,
2889 options,
2890 scratch,
2891 accelerator,
2892 timings,
2893 )?;
2894 match component_result.precomputed {
2895 PrecomputedComponent::Dwt53(precomputed) => precomputed_53.push(precomputed),
2896 PrecomputedComponent::Dwt97(precomputed) => precomputed_97.push(precomputed),
2897 }
2898 if let Some((actual, expected)) = component_result.float_validation_coefficients {
2899 float_validation_actual.extend(actual);
2900 float_validation_expected.extend(expected);
2901 }
2902 if let Some((actual, expected)) = component_result.integer_validation_coefficients {
2903 integer_validation_actual.extend(actual);
2904 integer_validation_expected.extend(expected);
2905 }
2906 }
2907
2908 let float_reference_metrics = if options.validate_against_float_reference {
2909 Some(error_metrics_i32(
2910 &float_validation_actual,
2911 &float_validation_expected,
2912 )?)
2913 } else {
2914 None
2915 };
2916 let integer_reference_metrics = if options.validate_against_integer_reference {
2917 Some(error_metrics_i32(
2918 &integer_validation_actual,
2919 &integer_validation_expected,
2920 )?)
2921 } else {
2922 None
2923 };
2924
2925 let precomputed_components = if matches!(
2926 options.coefficient_path,
2927 JpegToHtj2kCoefficientPath::FloatDirectLinear97
2928 ) {
2929 PrecomputedComponentBatch::Dwt97(precomputed_97)
2930 } else {
2931 PrecomputedComponentBatch::Dwt53(precomputed_53)
2932 };
2933
2934 Ok(ComponentTranscodeBatch {
2935 precomputed_components,
2936 float_reference_metrics,
2937 integer_reference_metrics,
2938 })
2939}
2940
2941fn transcode_integer_component_batch(
2942 components: &[JpegDctComponent],
2943 component_sampling: &[(u8, u8)],
2944 decomposition_levels: u8,
2945 options: &JpegToHtj2kOptions,
2946 scratch: &mut JpegToHtj2kScratch,
2947 accelerator: &mut impl DctToWaveletStageAccelerator,
2948 timings: &mut TranscodeTimingReport,
2949) -> Result<ComponentTranscodeBatch, JpegToHtj2kError> {
2950 let mut precomputed_53: Vec<Option<PrecomputedHtj2k53Component>> =
2951 (0..components.len()).map(|_| None).collect();
2952 let mut float_validation_actual = Vec::new();
2953 let mut float_validation_expected = Vec::new();
2954 let mut integer_validation_actual = Vec::new();
2955 let mut integer_validation_expected = Vec::new();
2956
2957 for group in same_geometry_component_groups(components) {
2958 let group_wavelets = integer_wavelets_for_component_group(
2959 &group,
2960 components,
2961 decomposition_levels,
2962 scratch,
2963 accelerator,
2964 timings,
2965 )?;
2966 for (component_index, wavelet) in group.into_iter().zip(group_wavelets) {
2967 let component = &components[component_index];
2968 let (x_rsiz, y_rsiz) = component_sampling[component_index];
2969 let actual_coefficients = flatten_integer_wavelet(&wavelet);
2970 precomputed_53[component_index] = Some(PrecomputedHtj2k53Component {
2971 x_rsiz,
2972 y_rsiz,
2973 dwt: j2k_dwt_from_integer_wavelet(&wavelet),
2974 });
2975
2976 if options.validate_against_float_reference {
2977 float_validation_actual.extend(actual_coefficients.clone());
2978 float_validation_expected.extend(float_reference_coefficients(
2979 component,
2980 decomposition_levels,
2981 scratch,
2982 )?);
2983 }
2984 if options.validate_against_integer_reference {
2985 integer_validation_actual.extend(actual_coefficients);
2986 integer_validation_expected.extend(integer_reference_coefficients(
2987 component,
2988 decomposition_levels,
2989 )?);
2990 }
2991 }
2992 }
2993
2994 let float_reference_metrics = if options.validate_against_float_reference {
2995 Some(error_metrics_i32(
2996 &float_validation_actual,
2997 &float_validation_expected,
2998 )?)
2999 } else {
3000 None
3001 };
3002 let integer_reference_metrics = if options.validate_against_integer_reference {
3003 Some(error_metrics_i32(
3004 &integer_validation_actual,
3005 &integer_validation_expected,
3006 )?)
3007 } else {
3008 None
3009 };
3010 let precomputed_components = precomputed_53
3011 .into_iter()
3012 .map(|component| {
3013 component.ok_or(JpegToHtj2kError::Validation(
3014 "integer transcode did not produce all components",
3015 ))
3016 })
3017 .collect::<Result<Vec<_>, _>>()?;
3018
3019 Ok(ComponentTranscodeBatch {
3020 precomputed_components: PrecomputedComponentBatch::Dwt53(precomputed_components),
3021 float_reference_metrics,
3022 integer_reference_metrics,
3023 })
3024}
3025
3026fn integer_wavelets_for_component_group(
3027 group: &[usize],
3028 components: &[JpegDctComponent],
3029 decomposition_levels: u8,
3030 scratch: &mut JpegToHtj2kScratch,
3031 accelerator: &mut impl DctToWaveletStageAccelerator,
3032 timings: &mut TranscodeTimingReport,
3033) -> Result<Vec<IntegerWavelet>, JpegToHtj2kError> {
3034 let jobs = group
3035 .iter()
3036 .map(|&component_index| integer_dct_job_for_component(&components[component_index]))
3037 .collect::<Result<Vec<_>, _>>()?;
3038 record_batch_attempt(timings, group.len());
3039 let accelerator_start = Instant::now();
3040 let accelerated_first_levels = accelerator
3041 .dct_grid_to_reversible_dwt53_batch(&jobs)
3042 .map_err(JpegToHtj2kError::Accelerator)?;
3043 timings.dct_to_wavelet_accelerator_us = timings
3044 .dct_to_wavelet_accelerator_us
3045 .saturating_add(accelerator_start.elapsed().as_micros());
3046
3047 if let Some(first_levels) = accelerated_first_levels {
3048 if first_levels.len() != group.len() {
3049 return Err(JpegToHtj2kError::Validation(
3050 "reversible 5/3 batch accelerator returned wrong component count",
3051 ));
3052 }
3053 timings.component_count = timings.component_count.saturating_add(group.len());
3054 record_accelerator_dispatch(timings, group.len());
3055 let decompose_start = Instant::now();
3056 let wavelets = first_levels
3057 .into_iter()
3058 .map(|first_level| integer_wavelet_from_first_level(first_level, decomposition_levels))
3059 .collect();
3060 timings.dwt_decompose_us = timings
3061 .dwt_decompose_us
3062 .saturating_add(decompose_start.elapsed().as_micros());
3063 return Ok(wavelets);
3064 }
3065
3066 group
3067 .iter()
3068 .map(|&component_index| {
3069 integer_direct_wavelet_from_component(
3070 &components[component_index],
3071 decomposition_levels,
3072 scratch,
3073 accelerator,
3074 timings,
3075 )
3076 })
3077 .collect()
3078}
3079
3080fn same_geometry_component_groups(components: &[JpegDctComponent]) -> Vec<Vec<usize>> {
3081 let mut assigned = vec![false; components.len()];
3082 let mut groups = Vec::new();
3083
3084 for component_index in 0..components.len() {
3085 if assigned[component_index] {
3086 continue;
3087 }
3088 assigned[component_index] = true;
3089 let mut group = vec![component_index];
3090 for candidate_index in component_index + 1..components.len() {
3091 if !assigned[candidate_index]
3092 && same_component_geometry(
3093 &components[component_index],
3094 &components[candidate_index],
3095 )
3096 {
3097 assigned[candidate_index] = true;
3098 group.push(candidate_index);
3099 }
3100 }
3101 groups.push(group);
3102 }
3103
3104 groups
3105}
3106
3107fn same_component_geometry(left: &JpegDctComponent, right: &JpegDctComponent) -> bool {
3108 left.width == right.width
3109 && left.height == right.height
3110 && left.block_cols == right.block_cols
3111 && left.block_rows == right.block_rows
3112}
3113
3114fn integer_dct_job_for_component(
3115 component: &JpegDctComponent,
3116) -> Result<DctGridToReversibleDwt53Job<'_>, JpegToHtj2kError> {
3117 validate_component_block_grid(component)?;
3118 Ok(DctGridToReversibleDwt53Job {
3119 dequantized_blocks: &component.dequantized_blocks,
3120 block_cols: component.block_cols as usize,
3121 block_rows: component.block_rows as usize,
3122 width: component.width as usize,
3123 height: component.height as usize,
3124 })
3125}
3126
3127#[allow(clippy::too_many_arguments)]
3128fn component_to_precomputed_htj2k(
3129 component: &JpegDctComponent,
3130 x_rsiz: u8,
3131 y_rsiz: u8,
3132 decomposition_levels: u8,
3133 options: &JpegToHtj2kOptions,
3134 scratch: &mut JpegToHtj2kScratch,
3135 accelerator: &mut impl DctToWaveletStageAccelerator,
3136 timings: &mut TranscodeTimingReport,
3137) -> Result<ComponentTranscodeResult, JpegToHtj2kError> {
3138 let (dwt, actual_coefficients) = match options.coefficient_path {
3139 JpegToHtj2kCoefficientPath::IntegerDirect53 => {
3140 let wavelet = integer_direct_wavelet_from_component(
3141 component,
3142 decomposition_levels,
3143 scratch,
3144 accelerator,
3145 timings,
3146 )?;
3147 (
3148 PrecomputedComponent::Dwt53(PrecomputedHtj2k53Component {
3149 x_rsiz,
3150 y_rsiz,
3151 dwt: j2k_dwt_from_integer_wavelet(&wavelet),
3152 }),
3153 flatten_integer_wavelet(&wavelet),
3154 )
3155 }
3156 JpegToHtj2kCoefficientPath::FloatDirectLinear53 => {
3157 let wavelet = float_direct_wavelet_from_component(
3158 component,
3159 decomposition_levels,
3160 scratch,
3161 accelerator,
3162 timings,
3163 )?;
3164 (
3165 PrecomputedComponent::Dwt53(PrecomputedHtj2k53Component {
3166 x_rsiz,
3167 y_rsiz,
3168 dwt: j2k_dwt_from_wavelet(
3169 &wavelet,
3170 component.width as usize,
3171 component.height as usize,
3172 ),
3173 }),
3174 rounded_wavelet_i32(&wavelet)?,
3175 )
3176 }
3177 JpegToHtj2kCoefficientPath::FloatDirectLinear97 => {
3178 let wavelet = float_direct_97_wavelet_from_component(
3179 component,
3180 decomposition_levels,
3181 scratch,
3182 accelerator,
3183 timings,
3184 )?;
3185 (
3186 PrecomputedComponent::Dwt97(PrecomputedHtj2k97Component {
3187 x_rsiz,
3188 y_rsiz,
3189 dwt: j2k_dwt97_from_wavelet(
3190 &wavelet,
3191 component.width as usize,
3192 component.height as usize,
3193 ),
3194 }),
3195 rounded_wavelet97_i32(&wavelet)?,
3196 )
3197 }
3198 };
3199 let float_validation_coefficients = if options.validate_against_float_reference {
3200 let expected = match options.coefficient_path {
3201 JpegToHtj2kCoefficientPath::FloatDirectLinear97 => {
3202 float97_reference_coefficients(component, decomposition_levels, scratch)?
3203 }
3204 JpegToHtj2kCoefficientPath::IntegerDirect53
3205 | JpegToHtj2kCoefficientPath::FloatDirectLinear53 => {
3206 float_reference_coefficients(component, decomposition_levels, scratch)?
3207 }
3208 };
3209 Some((actual_coefficients.clone(), expected))
3210 } else {
3211 None
3212 };
3213 let integer_validation_coefficients = if options.validate_against_integer_reference {
3214 let expected = integer_reference_coefficients(component, decomposition_levels)?;
3215 Some((actual_coefficients, expected))
3216 } else {
3217 None
3218 };
3219
3220 Ok(ComponentTranscodeResult {
3221 precomputed: dwt,
3222 float_validation_coefficients,
3223 integer_validation_coefficients,
3224 })
3225}
3226
3227fn transcode_path_name(
3228 all_unit_sampled: bool,
3229 coefficient_path: JpegToHtj2kCoefficientPath,
3230) -> &'static str {
3231 match (all_unit_sampled, coefficient_path) {
3232 (true, JpegToHtj2kCoefficientPath::IntegerDirect53) => {
3233 "full_resolution_components_integer_direct_53"
3234 }
3235 (false, JpegToHtj2kCoefficientPath::IntegerDirect53) => {
3236 "native_component_sampling_integer_direct_53"
3237 }
3238 (true, JpegToHtj2kCoefficientPath::FloatDirectLinear53) => {
3239 "full_resolution_components_float_direct_53"
3240 }
3241 (false, JpegToHtj2kCoefficientPath::FloatDirectLinear53) => {
3242 "native_component_sampling_float_direct_53"
3243 }
3244 (true, JpegToHtj2kCoefficientPath::FloatDirectLinear97) => {
3245 "full_resolution_components_float_direct_97"
3246 }
3247 (false, JpegToHtj2kCoefficientPath::FloatDirectLinear97) => {
3248 "native_component_sampling_float_direct_97"
3249 }
3250 }
3251}
3252
3253fn float_direct_wavelet_from_component(
3254 component: &JpegDctComponent,
3255 decomposition_levels: u8,
3256 scratch: &mut JpegToHtj2kScratch,
3257 accelerator: &mut impl DctToWaveletStageAccelerator,
3258 timings: &mut TranscodeTimingReport,
3259) -> Result<ComponentWavelet, JpegToHtj2kError> {
3260 timings.component_count = timings.component_count.saturating_add(1);
3261 let repack_start = Instant::now();
3262 dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3263 timings.jpeg_dct_repack_us = timings
3264 .jpeg_dct_repack_us
3265 .saturating_add(repack_start.elapsed().as_micros());
3266 let blocks = &scratch.dct_blocks_f64;
3267 let job = DctGridToDwt53Job {
3268 blocks,
3269 block_cols: component.block_cols as usize,
3270 block_rows: component.block_rows as usize,
3271 width: component.width as usize,
3272 height: component.height as usize,
3273 };
3274 record_accelerator_attempt(timings, 1);
3275 let accelerator_start = Instant::now();
3276 let accelerated = accelerator
3277 .dct_grid_to_dwt53(job)
3278 .map_err(JpegToHtj2kError::Accelerator)?;
3279 timings.dct_to_wavelet_accelerator_us = timings
3280 .dct_to_wavelet_accelerator_us
3281 .saturating_add(accelerator_start.elapsed().as_micros());
3282 let bands = if let Some(bands) = accelerated {
3283 record_accelerator_dispatch(timings, 1);
3284 bands
3285 } else {
3286 record_cpu_fallback(timings, 1);
3287 let fallback_start = Instant::now();
3288 let bands = dct8x8_blocks_to_dwt53_float_linear_with_scratch(
3289 blocks,
3290 component.block_cols as usize,
3291 component.block_rows as usize,
3292 component.width as usize,
3293 component.height as usize,
3294 &mut scratch.dct53_grid,
3295 )
3296 .map_err(dct53_grid_error)?;
3297 timings.dct_to_wavelet_cpu_fallback_us = timings
3298 .dct_to_wavelet_cpu_fallback_us
3299 .saturating_add(fallback_start.elapsed().as_micros());
3300 bands
3301 };
3302 let decompose_start = Instant::now();
3303 let wavelet = decompose_from_first_level(bands, usize::from(decomposition_levels));
3304 timings.dwt_decompose_us = timings
3305 .dwt_decompose_us
3306 .saturating_add(decompose_start.elapsed().as_micros());
3307 Ok(wavelet)
3308}
3309
3310fn float_direct_97_wavelet_from_component(
3311 component: &JpegDctComponent,
3312 decomposition_levels: u8,
3313 scratch: &mut JpegToHtj2kScratch,
3314 accelerator: &mut impl DctToWaveletStageAccelerator,
3315 timings: &mut TranscodeTimingReport,
3316) -> Result<ComponentWavelet97, JpegToHtj2kError> {
3317 timings.component_count = timings.component_count.saturating_add(1);
3318 let repack_start = Instant::now();
3319 dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3320 timings.jpeg_dct_repack_us = timings
3321 .jpeg_dct_repack_us
3322 .saturating_add(repack_start.elapsed().as_micros());
3323 let blocks = &scratch.dct_blocks_f64;
3324 let job = DctGridToDwt97Job {
3325 blocks,
3326 block_cols: component.block_cols as usize,
3327 block_rows: component.block_rows as usize,
3328 width: component.width as usize,
3329 height: component.height as usize,
3330 };
3331 record_accelerator_attempt(timings, 1);
3332 let accelerator_start = Instant::now();
3333 let accelerated = accelerator
3334 .dct_grid_to_dwt97(job)
3335 .map_err(JpegToHtj2kError::Accelerator)?;
3336 timings.dct_to_wavelet_accelerator_us = timings
3337 .dct_to_wavelet_accelerator_us
3338 .saturating_add(accelerator_start.elapsed().as_micros());
3339 let bands = if let Some(bands) = accelerated {
3340 record_accelerator_dispatch(timings, 1);
3341 bands
3342 } else {
3343 record_cpu_fallback(timings, 1);
3344 let fallback_start = Instant::now();
3345 let bands = dct8x8_blocks_then_dwt97_float_with_scratch(
3346 blocks,
3347 component.block_cols as usize,
3348 component.block_rows as usize,
3349 component.width as usize,
3350 component.height as usize,
3351 &mut scratch.dct97_grid,
3352 )
3353 .map_err(dct97_grid_error)?;
3354 timings.dct_to_wavelet_cpu_fallback_us = timings
3355 .dct_to_wavelet_cpu_fallback_us
3356 .saturating_add(fallback_start.elapsed().as_micros());
3357 bands
3358 };
3359 let decompose_start = Instant::now();
3360 let wavelet = decompose_97_from_first_level_with_scratch(
3361 bands,
3362 usize::from(decomposition_levels),
3363 &mut scratch.dct97_grid,
3364 );
3365 timings.dwt_decompose_us = timings
3366 .dwt_decompose_us
3367 .saturating_add(decompose_start.elapsed().as_micros());
3368 Ok(wavelet)
3369}
3370
3371fn float_reference_coefficients(
3372 component: &JpegDctComponent,
3373 decomposition_levels: u8,
3374 scratch: &mut JpegToHtj2kScratch,
3375) -> Result<Vec<i32>, JpegToHtj2kError> {
3376 dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3377 let blocks = &scratch.dct_blocks_f64;
3378 let first_reference_level = dct8x8_blocks_then_dwt53_float(
3379 blocks,
3380 component.block_cols as usize,
3381 component.block_rows as usize,
3382 component.width as usize,
3383 component.height as usize,
3384 )
3385 .map_err(dct53_grid_error)?;
3386 let reference =
3387 decompose_from_first_level(first_reference_level, usize::from(decomposition_levels));
3388 rounded_wavelet_i32(&reference)
3389}
3390
3391fn float97_reference_coefficients(
3392 component: &JpegDctComponent,
3393 decomposition_levels: u8,
3394 scratch: &mut JpegToHtj2kScratch,
3395) -> Result<Vec<i32>, JpegToHtj2kError> {
3396 dct_blocks_to_8x8_f64_into(&component.dequantized_blocks, &mut scratch.dct_blocks_f64);
3397 let blocks = &scratch.dct_blocks_f64;
3398 let first_reference_level = dct8x8_blocks_then_dwt97_float(
3399 blocks,
3400 component.block_cols as usize,
3401 component.block_rows as usize,
3402 component.width as usize,
3403 component.height as usize,
3404 )
3405 .map_err(dct97_grid_error)?;
3406 let reference =
3407 decompose_97_from_first_level(first_reference_level, usize::from(decomposition_levels));
3408 rounded_wavelet97_i32(&reference)
3409}
3410
3411fn decompose_from_first_level(
3412 first_level: Dwt53TwoDimensional<f64>,
3413 decomposition_levels: usize,
3414) -> ComponentWavelet {
3415 let mut wavelet = ComponentWavelet {
3416 final_ll: first_level.ll.clone(),
3417 final_ll_width: first_level.low_width,
3418 final_ll_height: first_level.low_height,
3419 levels: vec![first_level],
3420 };
3421
3422 while wavelet.levels.len() < decomposition_levels {
3423 let next = linearized_53_2d_from_plane(
3424 &wavelet.final_ll,
3425 wavelet.final_ll_width,
3426 wavelet.final_ll_height,
3427 );
3428 wavelet.final_ll.clone_from(&next.ll);
3429 wavelet.final_ll_width = next.low_width;
3430 wavelet.final_ll_height = next.low_height;
3431 wavelet.levels.push(next);
3432 }
3433
3434 wavelet
3435}
3436
3437fn decompose_97_from_first_level(
3438 first_level: Dwt97TwoDimensional<f64>,
3439 decomposition_levels: usize,
3440) -> ComponentWavelet97 {
3441 let mut scratch = Dct97GridScratch::default();
3442 decompose_97_from_first_level_with_scratch(first_level, decomposition_levels, &mut scratch)
3443}
3444
3445fn decompose_97_from_first_level_with_scratch(
3446 first_level: Dwt97TwoDimensional<f64>,
3447 decomposition_levels: usize,
3448 scratch: &mut Dct97GridScratch,
3449) -> ComponentWavelet97 {
3450 let mut wavelet = ComponentWavelet97 {
3451 final_ll: first_level.ll.clone(),
3452 final_ll_width: first_level.low_width,
3453 final_ll_height: first_level.low_height,
3454 levels: vec![first_level],
3455 };
3456
3457 while wavelet.levels.len() < decomposition_levels {
3458 let next = linearized_97_2d_from_plane_with_scratch(
3459 &wavelet.final_ll,
3460 wavelet.final_ll_width,
3461 wavelet.final_ll_height,
3462 scratch,
3463 );
3464 wavelet.final_ll.clone_from(&next.ll);
3465 wavelet.final_ll_width = next.low_width;
3466 wavelet.final_ll_height = next.low_height;
3467 wavelet.levels.push(next);
3468 }
3469
3470 wavelet
3471}
3472
3473fn j2k_dwt_from_wavelet(
3474 wavelet: &ComponentWavelet,
3475 width: usize,
3476 height: usize,
3477) -> J2kForwardDwt53Output {
3478 let mut current_width = width;
3479 let mut current_height = height;
3480 let mut levels = Vec::with_capacity(wavelet.levels.len());
3481
3482 for level in &wavelet.levels {
3483 levels.push(J2kForwardDwt53Level {
3484 hl: level.hl.iter().map(|&value| value as f32).collect(),
3485 lh: level.lh.iter().map(|&value| value as f32).collect(),
3486 hh: level.hh.iter().map(|&value| value as f32).collect(),
3487 width: current_width as u32,
3488 height: current_height as u32,
3489 low_width: level.low_width as u32,
3490 low_height: level.low_height as u32,
3491 high_width: level.high_width as u32,
3492 high_height: level.high_height as u32,
3493 });
3494 current_width = level.low_width;
3495 current_height = level.low_height;
3496 }
3497 levels.reverse();
3498
3499 J2kForwardDwt53Output {
3500 ll: wavelet.final_ll.iter().map(|&value| value as f32).collect(),
3501 ll_width: wavelet.final_ll_width as u32,
3502 ll_height: wavelet.final_ll_height as u32,
3503 levels,
3504 }
3505}
3506
3507fn j2k_dwt97_from_wavelet(
3508 wavelet: &ComponentWavelet97,
3509 width: usize,
3510 height: usize,
3511) -> J2kForwardDwt97Output {
3512 let mut current_width = width;
3513 let mut current_height = height;
3514 let mut levels = Vec::with_capacity(wavelet.levels.len());
3515
3516 for level in &wavelet.levels {
3517 levels.push(J2kForwardDwt97Level {
3518 hl: level.hl.iter().map(|&value| value as f32).collect(),
3519 lh: level.lh.iter().map(|&value| value as f32).collect(),
3520 hh: level.hh.iter().map(|&value| value as f32).collect(),
3521 width: current_width as u32,
3522 height: current_height as u32,
3523 low_width: level.low_width as u32,
3524 low_height: level.low_height as u32,
3525 high_width: level.high_width as u32,
3526 high_height: level.high_height as u32,
3527 });
3528 current_width = level.low_width;
3529 current_height = level.low_height;
3530 }
3531 levels.reverse();
3532
3533 J2kForwardDwt97Output {
3534 ll: wavelet.final_ll.iter().map(|&value| value as f32).collect(),
3535 ll_width: wavelet.final_ll_width as u32,
3536 ll_height: wavelet.final_ll_height as u32,
3537 levels,
3538 }
3539}
3540
3541fn j2k_dwt_from_integer_wavelet(wavelet: &IntegerWavelet) -> J2kForwardDwt53Output {
3542 let mut levels = Vec::with_capacity(wavelet.levels.len());
3543 for level in &wavelet.levels {
3544 levels.push(J2kForwardDwt53Level {
3545 hl: level.hl.iter().map(|&value| value as f32).collect(),
3546 lh: level.lh.iter().map(|&value| value as f32).collect(),
3547 hh: level.hh.iter().map(|&value| value as f32).collect(),
3548 width: level.width as u32,
3549 height: level.height as u32,
3550 low_width: level.low_width as u32,
3551 low_height: level.low_height as u32,
3552 high_width: level.high_width as u32,
3553 high_height: level.high_height as u32,
3554 });
3555 }
3556 levels.reverse();
3557
3558 J2kForwardDwt53Output {
3559 ll: wavelet.final_ll.iter().map(|&value| value as f32).collect(),
3560 ll_width: wavelet.final_ll_width as u32,
3561 ll_height: wavelet.final_ll_height as u32,
3562 levels,
3563 }
3564}
3565
3566fn rounded_wavelet_i32(wavelet: &ComponentWavelet) -> Result<Vec<i32>, JpegToHtj2kError> {
3567 let coefficient_count = wavelet.final_ll.len()
3568 + wavelet
3569 .levels
3570 .iter()
3571 .map(|level| level.hl.len() + level.lh.len() + level.hh.len())
3572 .sum::<usize>();
3573 let mut output = Vec::with_capacity(coefficient_count);
3574 append_rounded_i32(&wavelet.final_ll, &mut output)?;
3575 for level in wavelet.levels.iter().rev() {
3576 append_rounded_i32(&level.hl, &mut output)?;
3577 append_rounded_i32(&level.lh, &mut output)?;
3578 append_rounded_i32(&level.hh, &mut output)?;
3579 }
3580 Ok(output)
3581}
3582
3583fn rounded_wavelet97_i32(wavelet: &ComponentWavelet97) -> Result<Vec<i32>, JpegToHtj2kError> {
3584 let coefficient_count = wavelet.final_ll.len()
3585 + wavelet
3586 .levels
3587 .iter()
3588 .map(|level| level.hl.len() + level.lh.len() + level.hh.len())
3589 .sum::<usize>();
3590 let mut output = Vec::with_capacity(coefficient_count);
3591 append_rounded_i32(&wavelet.final_ll, &mut output)?;
3592 for level in wavelet.levels.iter().rev() {
3593 append_rounded_i32(&level.hl, &mut output)?;
3594 append_rounded_i32(&level.lh, &mut output)?;
3595 append_rounded_i32(&level.hh, &mut output)?;
3596 }
3597 Ok(output)
3598}
3599
3600fn integer_direct_wavelet_from_component(
3601 component: &JpegDctComponent,
3602 decomposition_levels: u8,
3603 scratch: &mut JpegToHtj2kScratch,
3604 accelerator: &mut impl DctToWaveletStageAccelerator,
3605 timings: &mut TranscodeTimingReport,
3606) -> Result<IntegerWavelet, JpegToHtj2kError> {
3607 let job = integer_dct_job_for_component(component)?;
3608 timings.component_count = timings.component_count.saturating_add(1);
3609 record_accelerator_attempt(timings, 1);
3610 let accelerator_start = Instant::now();
3611 let accelerated_first_level = accelerator
3612 .dct_grid_to_reversible_dwt53(job)
3613 .map_err(JpegToHtj2kError::Accelerator)?;
3614 timings.dct_to_wavelet_accelerator_us = timings
3615 .dct_to_wavelet_accelerator_us
3616 .saturating_add(accelerator_start.elapsed().as_micros());
3617 if let Some(first_level) = accelerated_first_level {
3618 record_accelerator_dispatch(timings, 1);
3619 let decompose_start = Instant::now();
3620 let wavelet = integer_wavelet_from_first_level(first_level, decomposition_levels);
3621 timings.dwt_decompose_us = timings
3622 .dwt_decompose_us
3623 .saturating_add(decompose_start.elapsed().as_micros());
3624 return Ok(wavelet);
3625 }
3626
3627 scratch.integer_idct_blocks.clear();
3628 scratch
3629 .integer_idct_blocks
3630 .resize_with(component.dequantized_blocks.len(), || None);
3631 record_cpu_fallback(timings, 1);
3632 let fallback_start = Instant::now();
3633 let (final_ll, final_ll_width, final_ll_height, first_level) =
3634 integer_direct_first_level_from_component(
3635 component,
3636 &mut scratch.integer_idct_blocks,
3637 &mut scratch.integer_row,
3638 )?;
3639 timings.dct_to_wavelet_cpu_fallback_us = timings
3640 .dct_to_wavelet_cpu_fallback_us
3641 .saturating_add(fallback_start.elapsed().as_micros());
3642 let decompose_start = Instant::now();
3643 let wavelet = integer_wavelet_from_first_parts(
3644 final_ll,
3645 final_ll_width,
3646 final_ll_height,
3647 first_level,
3648 decomposition_levels,
3649 );
3650 timings.dwt_decompose_us = timings
3651 .dwt_decompose_us
3652 .saturating_add(decompose_start.elapsed().as_micros());
3653 Ok(wavelet)
3654}
3655
3656fn integer_wavelet_from_first_level(
3657 first_level: ReversibleDwt53FirstLevel,
3658 decomposition_levels: u8,
3659) -> IntegerWavelet {
3660 let (final_ll, final_ll_width, final_ll_height, first_level) =
3661 integer_wavelet_first_level_from_accelerated(first_level);
3662 integer_wavelet_from_first_parts(
3663 final_ll,
3664 final_ll_width,
3665 final_ll_height,
3666 first_level,
3667 decomposition_levels,
3668 )
3669}
3670
3671fn integer_wavelet_from_first_parts(
3672 mut final_ll: Vec<i32>,
3673 mut final_ll_width: usize,
3674 mut final_ll_height: usize,
3675 first_level: IntegerWaveletLevel,
3676 decomposition_levels: u8,
3677) -> IntegerWavelet {
3678 let mut levels = vec![first_level];
3679
3680 let remaining_levels = usize::from(decomposition_levels.saturating_sub(1));
3681 if remaining_levels > 0 {
3682 let tail =
3683 reversible_dwt53_i32(final_ll, final_ll_width, final_ll_height, remaining_levels);
3684 final_ll = tail.final_ll;
3685 final_ll_width = tail.final_ll_width;
3686 final_ll_height = tail.final_ll_height;
3687 levels.extend(tail.levels);
3688 }
3689
3690 IntegerWavelet {
3691 final_ll,
3692 final_ll_width,
3693 final_ll_height,
3694 levels,
3695 }
3696}
3697
3698fn integer_wavelet_first_level_from_accelerated(
3699 first_level: ReversibleDwt53FirstLevel,
3700) -> (Vec<i32>, usize, usize, IntegerWaveletLevel) {
3701 let level = IntegerWaveletLevel {
3702 width: first_level.low_width + first_level.high_width,
3703 height: first_level.low_height + first_level.high_height,
3704 low_width: first_level.low_width,
3705 low_height: first_level.low_height,
3706 high_width: first_level.high_width,
3707 high_height: first_level.high_height,
3708 hl: first_level.hl,
3709 lh: first_level.lh,
3710 hh: first_level.hh,
3711 };
3712 (
3713 first_level.ll,
3714 first_level.low_width,
3715 first_level.low_height,
3716 level,
3717 )
3718}
3719
3720fn integer_direct_first_level_from_component(
3721 component: &JpegDctComponent,
3722 idct_blocks: &mut [Option<[i32; 64]>],
3723 row: &mut Vec<i32>,
3724) -> Result<(Vec<i32>, usize, usize, IntegerWaveletLevel), JpegToHtj2kError> {
3725 let width = component.width as usize;
3726 let height = component.height as usize;
3727 let low_width = width.div_ceil(2);
3728 let low_height = height.div_ceil(2);
3729 let high_width = width / 2;
3730 let high_height = height / 2;
3731
3732 let mut ll = Vec::with_capacity(low_width * low_height);
3733 let mut hl = Vec::with_capacity(high_width * low_height);
3734 let mut lh = Vec::with_capacity(low_width * high_height);
3735 let mut hh = Vec::with_capacity(high_width * high_height);
3736 row.clear();
3737 if row.capacity() < width {
3738 row.reserve(width - row.capacity());
3739 }
3740
3741 for output_y in 0..low_height {
3742 row.clear();
3743 for x in 0..width {
3744 row.push(vertical_53_i32_at(
3745 component,
3746 idct_blocks,
3747 x,
3748 output_y,
3749 true,
3750 )?);
3751 }
3752 reversible_lift_53_i32(row);
3753 ll.extend(row.iter().step_by(2).copied());
3754 hl.extend(row.iter().skip(1).step_by(2).copied());
3755 }
3756
3757 for output_y in 0..high_height {
3758 row.clear();
3759 for x in 0..width {
3760 row.push(vertical_53_i32_at(
3761 component,
3762 idct_blocks,
3763 x,
3764 output_y,
3765 false,
3766 )?);
3767 }
3768 reversible_lift_53_i32(row);
3769 lh.extend(row.iter().step_by(2).copied());
3770 hh.extend(row.iter().skip(1).step_by(2).copied());
3771 }
3772
3773 let level = IntegerWaveletLevel {
3774 width,
3775 height,
3776 low_width,
3777 low_height,
3778 high_width,
3779 high_height,
3780 hl,
3781 lh,
3782 hh,
3783 };
3784
3785 Ok((ll, low_width, low_height, level))
3786}
3787
3788fn vertical_53_i32_at(
3789 component: &JpegDctComponent,
3790 idct_blocks: &mut [Option<[i32; 64]>],
3791 x: usize,
3792 output_y: usize,
3793 low_pass: bool,
3794) -> Result<i32, JpegToHtj2kError> {
3795 if low_pass {
3796 vertical_low_53_i32_at(component, idct_blocks, x, output_y)
3797 } else {
3798 vertical_high_53_i32_at(component, idct_blocks, x, output_y)
3799 }
3800}
3801
3802fn vertical_low_53_i32_at(
3803 component: &JpegDctComponent,
3804 idct_blocks: &mut [Option<[i32; 64]>],
3805 x: usize,
3806 low_idx: usize,
3807) -> Result<i32, JpegToHtj2kError> {
3808 let height = component.height as usize;
3809 reversible_lift_53_low_at_fallible(height, low_idx, |y| {
3810 component_sample_i32(component, idct_blocks, x, y)
3811 })
3812}
3813
3814fn vertical_high_53_i32_at(
3815 component: &JpegDctComponent,
3816 idct_blocks: &mut [Option<[i32; 64]>],
3817 x: usize,
3818 high_idx: usize,
3819) -> Result<i32, JpegToHtj2kError> {
3820 let height = component.height as usize;
3821 reversible_lift_53_high_at_fallible(height, high_idx, |y| {
3822 component_sample_i32(component, idct_blocks, x, y)
3823 })
3824}
3825
3826fn component_sample_i32(
3827 component: &JpegDctComponent,
3828 idct_blocks: &mut [Option<[i32; 64]>],
3829 x: usize,
3830 y: usize,
3831) -> Result<i32, JpegToHtj2kError> {
3832 if x >= component.width as usize || y >= component.height as usize {
3833 return Err(JpegToHtj2kError::Validation(
3834 "component sample coordinate exceeds dimensions",
3835 ));
3836 }
3837 let block_cols = component.block_cols as usize;
3838 let block_x = x / 8;
3839 let block_y = y / 8;
3840 let block_idx = block_y * block_cols + block_x;
3841 let block = component
3842 .dequantized_blocks
3843 .get(block_idx)
3844 .ok_or(JpegToHtj2kError::Validation(
3845 "component block grid does not cover requested sample",
3846 ))?;
3847 let cached = idct_blocks
3848 .get_mut(block_idx)
3849 .ok_or(JpegToHtj2kError::Validation(
3850 "integer IDCT cache does not cover requested block",
3851 ))?;
3852 let block_samples = cached.get_or_insert_with(|| {
3853 let decoded = idct_islow_block(block);
3854 decoded.map(|sample| i32::from(sample) - 128)
3855 });
3856 let local_idx = (y % 8) * 8 + (x % 8);
3857 Ok(block_samples[local_idx])
3858}
3859
3860fn integer_reference_coefficients(
3861 component: &JpegDctComponent,
3862 decomposition_levels: u8,
3863) -> Result<Vec<i32>, JpegToHtj2kError> {
3864 let samples = idct_component_samples_i32(component)?;
3865 let wavelet = reversible_dwt53_i32(
3866 samples,
3867 component.width as usize,
3868 component.height as usize,
3869 usize::from(decomposition_levels),
3870 );
3871 Ok(flatten_integer_wavelet(&wavelet))
3872}
3873
3874fn idct_component_samples_i32(component: &JpegDctComponent) -> Result<Vec<i32>, JpegToHtj2kError> {
3875 validate_component_block_grid(component)?;
3876
3877 let width = component.width as usize;
3878 let height = component.height as usize;
3879 let block_cols = component.block_cols as usize;
3880 let block_rows = component.block_rows as usize;
3881 let mut samples = vec![0; width * height];
3882 for block_y in 0..block_rows {
3883 for block_x in 0..block_cols {
3884 let block = &component.dequantized_blocks[block_y * block_cols + block_x];
3885 let block_samples = idct_islow_block(block);
3886 for local_y in 0..8 {
3887 let y = block_y * 8 + local_y;
3888 if y >= height {
3889 continue;
3890 }
3891 for local_x in 0..8 {
3892 let x = block_x * 8 + local_x;
3893 if x >= width {
3894 continue;
3895 }
3896 samples[y * width + x] = i32::from(block_samples[local_y * 8 + local_x]) - 128;
3897 }
3898 }
3899 }
3900 }
3901
3902 Ok(samples)
3903}
3904
3905fn validate_component_block_grid(component: &JpegDctComponent) -> Result<(), JpegToHtj2kError> {
3906 let block_cols = component.block_cols as usize;
3907 let block_rows = component.block_rows as usize;
3908 let expected_blocks =
3909 block_cols
3910 .checked_mul(block_rows)
3911 .ok_or(JpegToHtj2kError::Validation(
3912 "component block grid overflow",
3913 ))?;
3914 if component.dequantized_blocks.len() != expected_blocks {
3915 return Err(JpegToHtj2kError::Validation(
3916 "component block count does not match block grid",
3917 ));
3918 }
3919
3920 Ok(())
3921}
3922
3923fn reversible_dwt53_i32(
3924 mut buffer: Vec<i32>,
3925 width: usize,
3926 height: usize,
3927 decomposition_levels: usize,
3928) -> IntegerWavelet {
3929 let mut current_width = width;
3930 let mut current_height = height;
3931 let mut levels = Vec::with_capacity(decomposition_levels);
3932
3933 for _ in 0..decomposition_levels {
3934 for x in 0..current_width {
3935 let mut column = Vec::with_capacity(current_height);
3936 for y in 0..current_height {
3937 column.push(buffer[y * width + x]);
3938 }
3939 reversible_lift_53_i32(&mut column);
3940 let low_len = current_height.div_ceil(2);
3941 for (idx, value) in column.iter().step_by(2).copied().enumerate() {
3942 buffer[idx * width + x] = value;
3943 }
3944 for (idx, value) in column.iter().skip(1).step_by(2).copied().enumerate() {
3945 buffer[(low_len + idx) * width + x] = value;
3946 }
3947 }
3948
3949 for y in 0..current_height {
3950 let row_start = y * width;
3951 let mut row = buffer[row_start..row_start + current_width].to_vec();
3952 reversible_lift_53_i32(&mut row);
3953 let low_len = current_width.div_ceil(2);
3954 for (idx, value) in row.iter().step_by(2).copied().enumerate() {
3955 buffer[row_start + idx] = value;
3956 }
3957 for (idx, value) in row.iter().skip(1).step_by(2).copied().enumerate() {
3958 buffer[row_start + low_len + idx] = value;
3959 }
3960 }
3961
3962 let low_width = current_width.div_ceil(2);
3963 let low_height = current_height.div_ceil(2);
3964 let high_width = current_width / 2;
3965 let high_height = current_height / 2;
3966 let mut hl = Vec::with_capacity(high_width * low_height);
3967 let mut lh = Vec::with_capacity(low_width * high_height);
3968 let mut hh = Vec::with_capacity(high_width * high_height);
3969
3970 for y in 0..low_height {
3971 for x in 0..high_width {
3972 hl.push(buffer[y * width + low_width + x]);
3973 }
3974 }
3975 for y in 0..high_height {
3976 for x in 0..low_width {
3977 lh.push(buffer[(low_height + y) * width + x]);
3978 }
3979 }
3980 for y in 0..high_height {
3981 for x in 0..high_width {
3982 hh.push(buffer[(low_height + y) * width + low_width + x]);
3983 }
3984 }
3985
3986 levels.push(IntegerWaveletLevel {
3987 width: current_width,
3988 height: current_height,
3989 low_width,
3990 low_height,
3991 high_width,
3992 high_height,
3993 hl,
3994 lh,
3995 hh,
3996 });
3997 current_width = low_width;
3998 current_height = low_height;
3999 }
4000
4001 let mut final_ll = Vec::with_capacity(current_width * current_height);
4002 for y in 0..current_height {
4003 for x in 0..current_width {
4004 final_ll.push(buffer[y * width + x]);
4005 }
4006 }
4007
4008 IntegerWavelet {
4009 final_ll,
4010 final_ll_width: current_width,
4011 final_ll_height: current_height,
4012 levels,
4013 }
4014}
4015
4016fn flatten_integer_wavelet(wavelet: &IntegerWavelet) -> Vec<i32> {
4017 let coefficient_count = wavelet.final_ll.len()
4018 + wavelet
4019 .levels
4020 .iter()
4021 .map(|level| level.hl.len() + level.lh.len() + level.hh.len())
4022 .sum::<usize>();
4023 let mut output = Vec::with_capacity(coefficient_count);
4024 output.extend_from_slice(&wavelet.final_ll);
4025 for level in wavelet.levels.iter().rev() {
4026 output.extend_from_slice(&level.hl);
4027 output.extend_from_slice(&level.lh);
4028 output.extend_from_slice(&level.hh);
4029 }
4030 output
4031}
4032
4033fn append_rounded_i32(values: &[f64], output: &mut Vec<i32>) -> Result<(), JpegToHtj2kError> {
4034 for &value in values {
4035 output.push(round_f64_to_i32(value)?);
4036 }
4037 Ok(())
4038}
4039
4040fn round_f64_to_i32(value: f64) -> Result<i32, JpegToHtj2kError> {
4041 let rounded = value.round();
4042 if !rounded.is_finite() {
4043 return Err(JpegToHtj2kError::Validation(
4044 "float reference coefficient is not finite",
4045 ));
4046 }
4047 if rounded < f64::from(i32::MIN) || rounded > f64::from(i32::MAX) {
4048 return Err(JpegToHtj2kError::Validation(
4049 "float reference coefficient exceeds i32 range",
4050 ));
4051 }
4052 Ok(rounded as i32)
4053}
4054
4055fn decomposition_levels_for_components(
4056 components: &[JpegDctComponent],
4057 requested_levels: u8,
4058) -> Result<u8, JpegToHtj2kError> {
4059 if requested_levels == 0 {
4060 return Err(JpegToHtj2kError::Unsupported(
4061 "jpeg_to_htj2k requires at least one decomposition level",
4062 ));
4063 }
4064
4065 let available_levels = components
4066 .iter()
4067 .map(|component| available_decomposition_levels(component.width, component.height))
4068 .min()
4069 .ok_or(JpegToHtj2kError::Unsupported("missing JPEG components"))?;
4070 let decomposition_levels = requested_levels.min(available_levels);
4071 if decomposition_levels == 0 {
4072 return Err(JpegToHtj2kError::Unsupported(
4073 "component dimensions are too small for a DWT decomposition",
4074 ));
4075 }
4076
4077 Ok(decomposition_levels)
4078}
4079
4080fn available_decomposition_levels(width: u32, height: u32) -> u8 {
4081 let min_dim = width.min(height);
4082 if min_dim <= 1 {
4083 0
4084 } else {
4085 min_dim.ilog2() as u8
4086 }
4087}
4088
4089fn component_sampling_for_jpeg(
4090 components: &[JpegDctComponent],
4091 reference_width: u32,
4092 reference_height: u32,
4093) -> Result<Vec<(u8, u8)>, JpegToHtj2kError> {
4094 let max_h = components
4095 .iter()
4096 .map(|component| component.h_samp)
4097 .max()
4098 .ok_or(JpegToHtj2kError::Unsupported("missing JPEG components"))?;
4099 let max_v = components
4100 .iter()
4101 .map(|component| component.v_samp)
4102 .max()
4103 .ok_or(JpegToHtj2kError::Unsupported("missing JPEG components"))?;
4104
4105 components
4106 .iter()
4107 .map(|component| {
4108 if component.h_samp == 0 || component.v_samp == 0 {
4109 return Err(JpegToHtj2kError::Unsupported(
4110 "JPEG component sampling factors must be non-zero",
4111 ));
4112 }
4113 if max_h % component.h_samp != 0 || max_v % component.v_samp != 0 {
4114 return Err(JpegToHtj2kError::Unsupported(
4115 "fractional JPEG component sampling is not supported",
4116 ));
4117 }
4118
4119 let x_rsiz = max_h / component.h_samp;
4120 let y_rsiz = max_v / component.v_samp;
4121 let expected_width = reference_width.div_ceil(u32::from(x_rsiz));
4122 let expected_height = reference_height.div_ceil(u32::from(y_rsiz));
4123 if component.width != expected_width || component.height != expected_height {
4124 return Err(JpegToHtj2kError::Unsupported(
4125 "JPEG component dimensions do not match derived SIZ sampling",
4126 ));
4127 }
4128
4129 Ok((x_rsiz, y_rsiz))
4130 })
4131 .collect()
4132}
4133
4134fn dct_blocks_to_8x8_f64_into(blocks: &[[i16; 64]], output: &mut Vec<[[f64; 8]; 8]>) {
4135 output.clear();
4136 output.reserve(blocks.len());
4137 for block in blocks {
4138 let mut converted = [[0.0; 8]; 8];
4139 for (idx, &coefficient) in block.iter().enumerate() {
4140 converted[idx / 8][idx % 8] = f64::from(coefficient);
4141 }
4142 output.push(converted);
4143 }
4144}
4145
4146fn dct_blocks_to_8x8_f64(blocks: &[[i16; 64]]) -> Vec<[[f64; 8]; 8]> {
4147 let mut output = Vec::with_capacity(blocks.len());
4148 dct_blocks_to_8x8_f64_into(blocks, &mut output);
4149 output
4150}
4151
4152#[cfg(test)]
4153mod tests {
4154 use super::*;
4155 use crate::accelerator::{
4156 DctGridI16ToHtj2k97CodeBlockBatch, PreencodedHtj2k97CodeBlock,
4157 PreencodedHtj2k97CompactCodeBlock, PreencodedHtj2k97CompactComponent,
4158 PreencodedHtj2k97CompactResolution, PreencodedHtj2k97CompactSubband,
4159 PreencodedHtj2k97Resolution, PreencodedHtj2k97Subband,
4160 };
4161 use j2k::adapter::encode_stage::{EncodedHtJ2kCodeBlock, J2kHtCodeBlockEncodeJob};
4162 use j2k_jpeg::transcode::JpegDctCodingMode;
4163 use j2k_jpeg::ColorSpace;
4164
4165 #[test]
4166 fn timing_report_add_assign_saturates_and_adds_all_counter_kinds() {
4167 let mut report = TranscodeTimingReport {
4168 source_raw_probe_us: u128::MAX - 1,
4169 dwt97_batch_ht_codeblock_dispatches: usize::MAX - 1,
4170 tile_count: 2,
4171 accelerator_jobs: 3,
4172 cpu_fallback_jobs: 4,
4173 ..TranscodeTimingReport::default()
4174 };
4175 report.add_assign(TranscodeTimingReport {
4176 source_raw_probe_us: 10,
4177 dwt97_batch_ht_codeblock_dispatches: 10,
4178 tile_count: 5,
4179 accelerator_jobs: 7,
4180 cpu_fallback_jobs: 11,
4181 ..TranscodeTimingReport::default()
4182 });
4183
4184 assert_eq!(report.source_raw_probe_us, u128::MAX);
4185 assert_eq!(report.dwt97_batch_ht_codeblock_dispatches, usize::MAX);
4186 assert_eq!(report.tile_count, 7);
4187 assert_eq!(report.accelerator_jobs, 10);
4188 assert_eq!(report.cpu_fallback_jobs, 15);
4189 }
4190
4191 #[derive(Default)]
4192 struct GroupedI16Accelerator {
4193 grouped_calls: usize,
4194 single_calls: usize,
4195 grouped_lengths: Vec<Vec<usize>>,
4196 }
4197
4198 impl DctToWaveletStageAccelerator for GroupedI16Accelerator {
4199 fn supports_htj2k97_i16_preencoded_batch(&self) -> bool {
4200 true
4201 }
4202
4203 fn dct_grid_i16_to_htj2k97_preencoded_batch(
4204 &mut self,
4205 jobs: &[DctGridI16ToHtj2k97CodeBlockJob<'_>],
4206 _options: Htj2k97CodeBlockOptions,
4207 ) -> Result<Option<Vec<PreencodedHtj2k97Component>>, TranscodeStageError> {
4208 self.single_calls = self.single_calls.saturating_add(1);
4209 Ok(Some(
4210 jobs.iter()
4211 .map(|job| dummy_preencoded_component(job.x_rsiz, job.y_rsiz))
4212 .collect(),
4213 ))
4214 }
4215
4216 fn dct_grid_i16_to_htj2k97_preencoded_batch_groups(
4217 &mut self,
4218 groups: &[DctGridI16ToHtj2k97CodeBlockBatch<'_, '_>],
4219 _options: Htj2k97CodeBlockOptions,
4220 ) -> Result<Option<Vec<Vec<PreencodedHtj2k97Component>>>, TranscodeStageError> {
4221 self.grouped_calls = self.grouped_calls.saturating_add(1);
4222 self.grouped_lengths
4223 .push(groups.iter().map(|group| group.jobs.len()).collect());
4224 Ok(Some(
4225 groups
4226 .iter()
4227 .map(|group| {
4228 group
4229 .jobs
4230 .iter()
4231 .map(|job| dummy_preencoded_component(job.x_rsiz, job.y_rsiz))
4232 .collect()
4233 })
4234 .collect(),
4235 ))
4236 }
4237 }
4238
4239 #[test]
4240 fn float97_batch_offers_i16_preencoded_geometry_groups_together() {
4241 let mut tiles = vec![test_float97_tile()];
4242 let options = JpegToHtj2kOptions::lossy_97();
4243 let mut scratch = JpegToHtj2kScratch::default();
4244 let mut accelerator = GroupedI16Accelerator::default();
4245 let mut timings = TranscodeTimingReport::default();
4246
4247 let (batch_count, job_count) = transform_float97_batch_tiles(
4248 &mut tiles,
4249 &options,
4250 &mut scratch,
4251 &mut accelerator,
4252 &mut timings,
4253 )
4254 .expect("grouped i16 preencoded transform");
4255
4256 assert_eq!(batch_count, 2);
4257 assert_eq!(job_count, 3);
4258 assert_eq!(accelerator.grouped_calls, 1);
4259 assert_eq!(accelerator.single_calls, 0);
4260 assert_eq!(accelerator.grouped_lengths, vec![vec![1, 2]]);
4261 assert!(tiles[0].preencoded_components.iter().all(Option::is_some));
4262 }
4263
4264 #[derive(Default)]
4265 struct CountingHtBatchEncodeAccelerator {
4266 batches: usize,
4267 jobs: usize,
4268 single_blocks: usize,
4269 }
4270
4271 impl J2kEncodeStageAccelerator for CountingHtBatchEncodeAccelerator {
4272 fn encode_ht_code_blocks(
4273 &mut self,
4274 jobs: &[J2kHtCodeBlockEncodeJob<'_>],
4275 ) -> Result<Option<Vec<EncodedHtJ2kCodeBlock>>, &'static str> {
4276 self.batches = self.batches.saturating_add(1);
4277 self.jobs = self.jobs.saturating_add(jobs.len());
4278 Ok(None)
4279 }
4280
4281 fn encode_ht_code_block(
4282 &mut self,
4283 _job: J2kHtCodeBlockEncodeJob<'_>,
4284 ) -> Result<Option<EncodedHtJ2kCodeBlock>, &'static str> {
4285 self.single_blocks = self.single_blocks.saturating_add(1);
4286 Ok(None)
4287 }
4288 }
4289
4290 #[test]
4291 fn float97_precomputed_prepared_tiles_offer_all_tiles_to_one_ht_batch() {
4292 let tiles = vec![
4293 test_float97_precomputed_tile(0),
4294 test_float97_precomputed_tile(1),
4295 ];
4296 let mut options = JpegToHtj2kOptions::lossy_97();
4297 options.encode_options.code_block_width_exp = 2;
4298 options.encode_options.code_block_height_exp = 2;
4299 let mut accelerator = CountingHtBatchEncodeAccelerator::default();
4300
4301 let encoded_tiles = encode_float97_prepared_tiles(tiles, &options, &mut accelerator);
4302
4303 assert_eq!(encoded_tiles.len(), 2);
4304 for (expected_tile_index, (actual_tile_index, encoded)) in
4305 encoded_tiles.into_iter().enumerate()
4306 {
4307 assert_eq!(actual_tile_index, expected_tile_index);
4308 let encoded = encoded.expect("precomputed batch tile encodes");
4309 assert!(encoded.codestream.starts_with(&[0xff, 0x4f]));
4310 }
4311 assert_eq!(accelerator.batches, 1);
4312 assert!(accelerator.jobs > 0);
4313 assert_eq!(accelerator.single_blocks, accelerator.jobs);
4314 }
4315
4316 #[test]
4317 fn compact_preencoded_component_storage_rebases_ranges_into_tile_payload() {
4318 let mut tile = test_float97_tile();
4319 let batch_payload = vec![1, 2, 3, 4, 5, 6];
4320 let component = PreencodedHtj2k97CompactComponent {
4321 x_rsiz: 1,
4322 y_rsiz: 1,
4323 resolutions: vec![PreencodedHtj2k97CompactResolution {
4324 subbands: vec![PreencodedHtj2k97CompactSubband {
4325 sub_band_type: crate::accelerator::J2kSubBandType::LowLow,
4326 num_cbs_x: 2,
4327 num_cbs_y: 1,
4328 total_bitplanes: 1,
4329 code_blocks: vec![
4330 PreencodedHtj2k97CompactCodeBlock {
4331 width: 1,
4332 height: 1,
4333 payload_range: 1..3,
4334 cleanup_length: 2,
4335 refinement_length: 0,
4336 num_coding_passes: 1,
4337 num_zero_bitplanes: 0,
4338 },
4339 PreencodedHtj2k97CompactCodeBlock {
4340 width: 1,
4341 height: 1,
4342 payload_range: 3..6,
4343 cleanup_length: 3,
4344 refinement_length: 0,
4345 num_coding_passes: 1,
4346 num_zero_bitplanes: 0,
4347 },
4348 ],
4349 }],
4350 }],
4351 };
4352
4353 store_compact_preencoded_component(&mut tile, 1, &batch_payload, component)
4354 .expect("compact component storage");
4355
4356 let stored = tile.preencoded_compact_components[1]
4357 .as_ref()
4358 .expect("stored compact component");
4359 assert_eq!(tile.preencoded_compact_payload, vec![2, 3, 4, 5, 6]);
4360 assert_eq!(
4361 stored.resolutions[0].subbands[0].code_blocks[0].payload_range,
4362 0..2
4363 );
4364 assert_eq!(
4365 stored.resolutions[0].subbands[0].code_blocks[1].payload_range,
4366 2..5
4367 );
4368 }
4369
4370 fn test_float97_tile() -> Float97BatchTile {
4371 let components = vec![
4372 test_component(0, 16, 16, 2, 2),
4373 test_component(1, 8, 8, 1, 1),
4374 test_component(2, 8, 8, 1, 1),
4375 ];
4376 Float97BatchTile {
4377 tile_index: 0,
4378 jpeg: JpegDctImage {
4379 width: 16,
4380 height: 16,
4381 color_space: ColorSpace::YCbCr,
4382 coding_mode: JpegDctCodingMode::BaselineSequential,
4383 scan_count: 1,
4384 components,
4385 restart_index: None,
4386 },
4387 component_sampling: vec![(1, 1), (2, 2), (2, 2)],
4388 decomposition_levels: 1,
4389 all_unit_sampled: false,
4390 component_reports: Vec::new(),
4391 precomputed_components: vec![None, None, None],
4392 preencoded_compact_payload: Vec::new(),
4393 preencoded_compact_components: vec![None, None, None],
4394 preencoded_components: vec![None, None, None],
4395 prequantized_components: vec![None, None, None],
4396 float_validation_actual: Vec::new(),
4397 float_validation_expected: Vec::new(),
4398 timings: TranscodeTimingReport::default(),
4399 }
4400 }
4401
4402 fn test_float97_precomputed_tile(tile_index: usize) -> Float97BatchTile {
4403 let width = 17;
4404 let height = 13;
4405 let component = test_component(0, width, height, 1, 1);
4406 Float97BatchTile {
4407 tile_index,
4408 jpeg: JpegDctImage {
4409 width,
4410 height,
4411 color_space: ColorSpace::Grayscale,
4412 coding_mode: JpegDctCodingMode::BaselineSequential,
4413 scan_count: 1,
4414 components: vec![component],
4415 restart_index: None,
4416 },
4417 component_sampling: vec![(1, 1)],
4418 decomposition_levels: 1,
4419 all_unit_sampled: true,
4420 component_reports: vec![TranscodeComponentReport {
4421 component_index: 0,
4422 width,
4423 height,
4424 block_cols: width.div_ceil(8),
4425 block_rows: height.div_ceil(8),
4426 x_rsiz: 1,
4427 y_rsiz: 1,
4428 }],
4429 precomputed_components: vec![Some(dummy_precomputed_component(1, 1, width, height))],
4430 preencoded_compact_payload: Vec::new(),
4431 preencoded_compact_components: vec![None],
4432 preencoded_components: vec![None],
4433 prequantized_components: vec![None],
4434 float_validation_actual: Vec::new(),
4435 float_validation_expected: Vec::new(),
4436 timings: TranscodeTimingReport::default(),
4437 }
4438 }
4439
4440 fn test_component(
4441 component_index: usize,
4442 width: u32,
4443 height: u32,
4444 h_samp: u8,
4445 v_samp: u8,
4446 ) -> JpegDctComponent {
4447 let block_cols = width.div_ceil(8);
4448 let block_rows = height.div_ceil(8);
4449 let block_count = (block_cols * block_rows) as usize;
4450 JpegDctComponent {
4451 component_index,
4452 width,
4453 height,
4454 h_samp,
4455 v_samp,
4456 block_cols,
4457 block_rows,
4458 quant_table: [1u16; 64],
4459 quantized_blocks: vec![[0i16; 64]; block_count],
4460 dequantized_blocks: vec![[0i16; 64]; block_count],
4461 }
4462 }
4463
4464 fn dummy_precomputed_component(
4465 x_rsiz: u8,
4466 y_rsiz: u8,
4467 width: u32,
4468 height: u32,
4469 ) -> PrecomputedHtj2k97Component {
4470 let low_width = width.div_ceil(2);
4471 let low_height = height.div_ceil(2);
4472 let high_width = width / 2;
4473 let high_height = height / 2;
4474 PrecomputedHtj2k97Component {
4475 x_rsiz,
4476 y_rsiz,
4477 dwt: J2kForwardDwt97Output {
4478 ll: sample_f32_coefficients(low_width * low_height, 0.25),
4479 ll_width: low_width,
4480 ll_height: low_height,
4481 levels: vec![J2kForwardDwt97Level {
4482 hl: sample_f32_coefficients(high_width * low_height, -0.75),
4483 lh: sample_f32_coefficients(low_width * high_height, 1.25),
4484 hh: sample_f32_coefficients(high_width * high_height, -1.5),
4485 width,
4486 height,
4487 low_width,
4488 low_height,
4489 high_width,
4490 high_height,
4491 }],
4492 },
4493 }
4494 }
4495
4496 fn sample_f32_coefficients(count: u32, seed: f32) -> Vec<f32> {
4497 (0..count)
4498 .map(|idx| seed + (idx as f32).sin() * 0.125)
4499 .collect()
4500 }
4501
4502 fn dummy_preencoded_component(x_rsiz: u8, y_rsiz: u8) -> PreencodedHtj2k97Component {
4503 PreencodedHtj2k97Component {
4504 x_rsiz,
4505 y_rsiz,
4506 resolutions: vec![PreencodedHtj2k97Resolution {
4507 subbands: vec![PreencodedHtj2k97Subband {
4508 sub_band_type: crate::accelerator::J2kSubBandType::LowLow,
4509 num_cbs_x: 1,
4510 num_cbs_y: 1,
4511 total_bitplanes: 1,
4512 code_blocks: vec![PreencodedHtj2k97CodeBlock {
4513 width: 1,
4514 height: 1,
4515 encoded: EncodedHtJ2kCodeBlock {
4516 data: Vec::new(),
4517 cleanup_length: 0,
4518 refinement_length: 0,
4519 num_coding_passes: 0,
4520 num_zero_bitplanes: 1,
4521 },
4522 }],
4523 }],
4524 }],
4525 }
4526 }
4527}