pub struct CudaEncodeStageAccelerator { /* private fields */ }Expand description
CUDA implementation of selected JPEG 2000 encode stages.
Implementations§
Source§impl CudaEncodeStageAccelerator
impl CudaEncodeStageAccelerator
Sourcepub fn with_profile_collection(collect_profile: bool) -> Self
pub fn with_profile_collection(collect_profile: bool) -> Self
Create an encode-stage accelerator with optional CUDA stage timing collection.
Sourcepub fn for_auto_host_output() -> Self
pub fn for_auto_host_output() -> Self
Create the measured Auto route for host-output HTJ2K encode.
CUDA keeps the DWT and HT code-block stages, while forward RCT and Tier-2 packetization stay on the CPU for the current host-pixel path.
Sourcepub fn prefer_cpu_forward_rct(self, prefer_cpu_forward_rct: bool) -> Self
pub fn prefer_cpu_forward_rct(self, prefer_cpu_forward_rct: bool) -> Self
Prefer scalar CPU forward RCT while keeping later CUDA stages enabled.
Sourcepub fn prefer_cpu_packetization(self, prefer_cpu_packetization: bool) -> Self
pub fn prefer_cpu_packetization(self, prefer_cpu_packetization: bool) -> Self
Prefer scalar CPU Tier-2 packetization while keeping CUDA Tier-1/HT block coding enabled.
This is useful for batches of many small tiles where launching a CUDA packetization kernel and copying several tiny descriptor buffers per tile costs more than forming the packet body on the host.
Sourcepub fn prefer_cpu_ht_subband(self, prefer_cpu_ht_subband: bool) -> Self
pub fn prefer_cpu_ht_subband(self, prefer_cpu_ht_subband: bool) -> Self
Prefer host sub-band quantization while keeping batched CUDA HT code-block encode enabled.
This avoids launching one CUDA quantize/subband path for every prepared subband in multi-resolution precomputed transcode outputs, where the many tiny launches cost more than CPU quantization.
Sourcepub fn prefer_cpu_quantize_subband(
self,
prefer_cpu_quantize_subband: bool,
) -> Self
pub fn prefer_cpu_quantize_subband( self, prefer_cpu_quantize_subband: bool, ) -> Self
Prefer host sub-band quantization while keeping CUDA HT code-block encode enabled.
Multi-resolution transcode workloads can contain thousands of small subbands; for those, CPU quantization plus one batched HT code-block encode per tile is currently faster than launching CUDA quantization for every subband.
Sourcepub const fn collected_stage_timings(&self) -> CudaEncodeStageTimings
pub const fn collected_stage_timings(&self) -> CudaEncodeStageTimings
Return cumulative CUDA encode stage timings collected by this accelerator.
Sourcepub fn reset_collected_stage_timings(&mut self)
pub fn reset_collected_stage_timings(&mut self)
Clear cumulative CUDA encode stage timings without changing dispatch counters.
Sourcepub fn deinterleave_attempts(&self) -> usize
pub fn deinterleave_attempts(&self) -> usize
Number of deinterleave attempts observed.
Sourcepub fn forward_rct_attempts(&self) -> usize
pub fn forward_rct_attempts(&self) -> usize
Number of forward RCT attempts observed.
Sourcepub fn forward_ict_attempts(&self) -> usize
pub fn forward_ict_attempts(&self) -> usize
Number of forward ICT attempts observed.
Sourcepub fn forward_dwt53_attempts(&self) -> usize
pub fn forward_dwt53_attempts(&self) -> usize
Number of forward 5/3 DWT attempts observed.
Sourcepub fn forward_dwt97_attempts(&self) -> usize
pub fn forward_dwt97_attempts(&self) -> usize
Number of forward 9/7 DWT attempts observed.
Sourcepub fn quantize_subband_attempts(&self) -> usize
pub fn quantize_subband_attempts(&self) -> usize
Number of sub-band quantization attempts observed.
Sourcepub fn tier1_code_block_attempts(&self) -> usize
pub fn tier1_code_block_attempts(&self) -> usize
Number of classic Tier-1 code-block attempts observed.
Sourcepub fn ht_code_block_attempts(&self) -> usize
pub fn ht_code_block_attempts(&self) -> usize
Number of HT code-block attempts observed.
Sourcepub fn packetization_attempts(&self) -> usize
pub fn packetization_attempts(&self) -> usize
Number of packetization attempts observed.
Sourcepub fn deinterleave_dispatches(&self) -> usize
pub fn deinterleave_dispatches(&self) -> usize
Number of deinterleave CUDA dispatches.
Sourcepub fn forward_rct_dispatches(&self) -> usize
pub fn forward_rct_dispatches(&self) -> usize
Number of forward RCT CUDA dispatches.
Sourcepub fn forward_ict_dispatches(&self) -> usize
pub fn forward_ict_dispatches(&self) -> usize
Number of forward ICT CUDA dispatches.
Sourcepub fn forward_dwt53_dispatches(&self) -> usize
pub fn forward_dwt53_dispatches(&self) -> usize
Number of forward 5/3 DWT CUDA dispatches.
Sourcepub fn forward_dwt97_dispatches(&self) -> usize
pub fn forward_dwt97_dispatches(&self) -> usize
Number of forward 9/7 DWT CUDA dispatches.
Sourcepub fn quantize_subband_dispatches(&self) -> usize
pub fn quantize_subband_dispatches(&self) -> usize
Number of sub-band quantization CUDA dispatches.
Sourcepub fn tier1_code_block_dispatches(&self) -> usize
pub fn tier1_code_block_dispatches(&self) -> usize
Number of classic Tier-1 CUDA dispatches.
Sourcepub fn ht_code_block_dispatches(&self) -> usize
pub fn ht_code_block_dispatches(&self) -> usize
Number of HT code-block CUDA dispatches.
Sourcepub fn packetization_dispatches(&self) -> usize
pub fn packetization_dispatches(&self) -> usize
Number of packetization CUDA dispatches.
Trait Implementations§
Source§impl Clone for CudaEncodeStageAccelerator
impl Clone for CudaEncodeStageAccelerator
Source§fn clone(&self) -> CudaEncodeStageAccelerator
fn clone(&self) -> CudaEncodeStageAccelerator
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for CudaEncodeStageAccelerator
impl Debug for CudaEncodeStageAccelerator
Source§impl Default for CudaEncodeStageAccelerator
impl Default for CudaEncodeStageAccelerator
Source§fn default() -> CudaEncodeStageAccelerator
fn default() -> CudaEncodeStageAccelerator
Source§impl J2kEncodeStageAccelerator for CudaEncodeStageAccelerator
impl J2kEncodeStageAccelerator for CudaEncodeStageAccelerator
Source§fn dispatch_report(&self) -> J2kEncodeDispatchReport
fn dispatch_report(&self) -> J2kEncodeDispatchReport
Source§fn encode_deinterleave(
&mut self,
job: J2kDeinterleaveToF32Job<'_>,
) -> Result<Option<Vec<Vec<f32>>>, &'static str>
fn encode_deinterleave( &mut self, job: J2kDeinterleaveToF32Job<'_>, ) -> Result<Option<Vec<Vec<f32>>>, &'static str>
Source§fn encode_forward_rct(
&mut self,
job: J2kForwardRctJob<'_>,
) -> Result<bool, &'static str>
fn encode_forward_rct( &mut self, job: J2kForwardRctJob<'_>, ) -> Result<bool, &'static str>
Source§fn encode_forward_ict(
&mut self,
job: J2kForwardIctJob<'_>,
) -> Result<bool, &'static str>
fn encode_forward_ict( &mut self, job: J2kForwardIctJob<'_>, ) -> Result<bool, &'static str>
Source§fn encode_forward_dwt53(
&mut self,
job: J2kForwardDwt53Job<'_>,
) -> Result<Option<J2kForwardDwt53Output>, &'static str>
fn encode_forward_dwt53( &mut self, job: J2kForwardDwt53Job<'_>, ) -> Result<Option<J2kForwardDwt53Output>, &'static str>
Source§fn encode_forward_dwt97(
&mut self,
job: J2kForwardDwt97Job<'_>,
) -> Result<Option<J2kForwardDwt97Output>, &'static str>
fn encode_forward_dwt97( &mut self, job: J2kForwardDwt97Job<'_>, ) -> Result<Option<J2kForwardDwt97Output>, &'static str>
Source§fn encode_quantize_subband(
&mut self,
job: J2kQuantizeSubbandJob<'_>,
) -> Result<Option<Vec<i32>>, &'static str>
fn encode_quantize_subband( &mut self, job: J2kQuantizeSubbandJob<'_>, ) -> Result<Option<Vec<i32>>, &'static str>
Source§fn encode_tier1_code_block(
&mut self,
_job: J2kTier1CodeBlockEncodeJob<'_>,
) -> Result<Option<EncodedJ2kCodeBlock>, &'static str>
fn encode_tier1_code_block( &mut self, _job: J2kTier1CodeBlockEncodeJob<'_>, ) -> Result<Option<EncodedJ2kCodeBlock>, &'static str>
Source§fn encode_ht_code_block(
&mut self,
job: J2kHtCodeBlockEncodeJob<'_>,
) -> Result<Option<EncodedHtJ2kCodeBlock>, &'static str>
fn encode_ht_code_block( &mut self, job: J2kHtCodeBlockEncodeJob<'_>, ) -> Result<Option<EncodedHtJ2kCodeBlock>, &'static str>
Source§fn encode_ht_code_blocks(
&mut self,
jobs: &[J2kHtCodeBlockEncodeJob<'_>],
) -> Result<Option<Vec<EncodedHtJ2kCodeBlock>>, &'static str>
fn encode_ht_code_blocks( &mut self, jobs: &[J2kHtCodeBlockEncodeJob<'_>], ) -> Result<Option<Vec<EncodedHtJ2kCodeBlock>>, &'static str>
Source§fn encode_htj2k_tile(
&mut self,
job: J2kHtj2kTileEncodeJob<'_>,
) -> Result<Option<Vec<u8>>, &'static str>
fn encode_htj2k_tile( &mut self, job: J2kHtj2kTileEncodeJob<'_>, ) -> Result<Option<Vec<u8>>, &'static str>
Source§fn encode_ht_subband(
&mut self,
job: J2kHtSubbandEncodeJob<'_>,
) -> Result<Option<Vec<EncodedHtJ2kCodeBlock>>, &'static str>
fn encode_ht_subband( &mut self, job: J2kHtSubbandEncodeJob<'_>, ) -> Result<Option<Vec<EncodedHtJ2kCodeBlock>>, &'static str>
Source§fn encode_packetization(
&mut self,
job: J2kPacketizationEncodeJob<'_>,
) -> Result<Option<Vec<u8>>, &'static str>
fn encode_packetization( &mut self, job: J2kPacketizationEncodeJob<'_>, ) -> Result<Option<Vec<u8>>, &'static str>
Source§fn encode_tier1_code_blocks(
&mut self,
_jobs: &[J2kTier1CodeBlockEncodeJob<'_>],
) -> Result<Option<Vec<EncodedJ2kCodeBlock>>, &'static str>
fn encode_tier1_code_blocks( &mut self, _jobs: &[J2kTier1CodeBlockEncodeJob<'_>], ) -> Result<Option<Vec<EncodedJ2kCodeBlock>>, &'static str>
Source§fn prefer_parallel_cpu_code_block_fallback(&self) -> bool
fn prefer_parallel_cpu_code_block_fallback(&self) -> bool
Source§fn prefer_parallel_cpu_tile_encode(&self) -> bool
fn prefer_parallel_cpu_tile_encode(&self) -> bool
Auto Trait Implementations§
impl Freeze for CudaEncodeStageAccelerator
impl RefUnwindSafe for CudaEncodeStageAccelerator
impl Send for CudaEncodeStageAccelerator
impl Sync for CudaEncodeStageAccelerator
impl Unpin for CudaEncodeStageAccelerator
impl UnsafeUnpin for CudaEncodeStageAccelerator
impl UnwindSafe for CudaEncodeStageAccelerator
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more