#![allow(dead_code)]
mod convert;
use crate::encode::encoder_types::DownsamplingMethod;
use crate::encode::layout::LayoutParams;
use crate::error::Result;
use crate::foundation::alloc::{
EncodeStats, try_alloc_filled, try_alloc_zeroed_f32_tracked, try_with_capacity_tracked,
};
use crate::foundation::consts::DCT_BLOCK_SIZE;
use crate::foundation::simd_types::{QuantTableSimd, ZeroBiasSimd};
use crate::quant::aq::streaming::StreamingAQ;
use crate::quant::{QuantTable, ZeroBiasParams};
use crate::types::{PixelFormat, Subsampling};
#[cfg(feature = "trellis")]
use crate::encode::trellis::HybridQuantContext;
#[cfg(feature = "trellis")]
use crate::encode::trellis::TrellisConfig;
#[cfg(feature = "trellis")]
use crate::foundation::consts::JPEG_ZIGZAG_ORDER;
#[derive(Debug, Clone)]
pub struct QuantContext {
pub y_quant_simd: QuantTableSimd,
pub cb_quant_simd: QuantTableSimd,
pub cr_quant_simd: QuantTableSimd,
pub y_zero_bias_simd: ZeroBiasSimd,
pub cb_zero_bias_simd: ZeroBiasSimd,
pub cr_zero_bias_simd: ZeroBiasSimd,
pub y_quant: QuantTable,
pub cb_quant: QuantTable,
pub cr_quant: QuantTable,
pub y_zero_bias: ZeroBiasParams,
pub cb_zero_bias: ZeroBiasParams,
pub cr_zero_bias: ZeroBiasParams,
}
impl QuantContext {
#[cfg(test)]
fn default_for_tests() -> Self {
let qt = QuantTable {
values: [16; 64],
precision: 0,
};
let zb = ZeroBiasParams::for_ycbcr(1.0, 0);
Self::new(qt.clone(), qt.clone(), qt, zb.clone(), zb.clone(), zb)
}
pub fn new(
y_quant: QuantTable,
cb_quant: QuantTable,
cr_quant: QuantTable,
y_zero_bias: ZeroBiasParams,
cb_zero_bias: ZeroBiasParams,
cr_zero_bias: ZeroBiasParams,
) -> Self {
Self {
y_quant_simd: QuantTableSimd::from_values(&y_quant.values),
cb_quant_simd: QuantTableSimd::from_values(&cb_quant.values),
cr_quant_simd: QuantTableSimd::from_values(&cr_quant.values),
y_zero_bias_simd: ZeroBiasSimd::from_params(&y_zero_bias),
cb_zero_bias_simd: ZeroBiasSimd::from_params(&cb_zero_bias),
cr_zero_bias_simd: ZeroBiasSimd::from_params(&cr_zero_bias),
y_quant,
cb_quant,
cr_quant,
y_zero_bias,
cb_zero_bias,
cr_zero_bias,
}
}
}
use crate::foundation::simd_types::Block8x8f;
use wide::f32x8;
#[inline]
pub(crate) fn extract_block_from_strip_wide(
strip: &[f32],
bx: usize,
local_by: usize,
strip_width: usize,
) -> Block8x8f {
let level_shift = f32x8::splat(128.0);
let x_start = bx * 8;
let y_start = local_by * 8;
let last_row_end = (y_start + 7) * strip_width + x_start + 8;
debug_assert!(
last_row_end <= strip.len(),
"extract_block_from_strip_wide: block ({bx}, {local_by}) out of bounds \
(need {last_row_end}, have {}; strip_width={strip_width})",
strip.len(),
);
let mut rows = [f32x8::ZERO; 8];
for dy in 0..8 {
let row_start = (y_start + dy) * strip_width + x_start;
let src = &strip[row_start..row_start + 8];
let mut arr = [0.0f32; 8];
arr.copy_from_slice(src);
rows[dy] = f32x8::from(arr) - level_shift;
}
Block8x8f { rows }
}
#[cfg(target_arch = "x86_64")]
#[inline]
fn forward_dct_dispatch(
token: Option<crate::encode::mage_simd::Desktop64>,
block: &Block8x8f,
) -> Block8x8f {
if let Some(t) = token {
return crate::encode::mage_simd::mage_forward_dct_8x8_wide(t, block);
}
crate::encode::dct::simd::forward_dct_8x8_wide(block)
}
#[cfg(not(target_arch = "x86_64"))]
#[inline]
fn forward_dct_dispatch(_token: (), block: &Block8x8f) -> Block8x8f {
crate::encode::dct::simd::forward_dct_8x8_wide(block)
}
#[derive(Debug)]
struct PendingBuffers {
y: [Vec<Block8x8f>; 2],
cb: [Vec<Block8x8f>; 2],
cr: [Vec<Block8x8f>; 2],
current: bool,
}
impl PendingBuffers {
#[inline]
fn current_idx(&self) -> usize {
self.current as usize
}
#[inline]
fn prev_idx(&self) -> usize {
(!self.current) as usize
}
#[inline]
fn swap(&mut self) {
self.current = !self.current;
}
fn clear_prev(&mut self) {
let idx = self.prev_idx();
self.y[idx].clear();
self.cb[idx].clear();
self.cr[idx].clear();
}
#[inline]
fn current_y(&self) -> &Vec<Block8x8f> {
&self.y[self.current_idx()]
}
#[inline]
fn current_y_mut(&mut self) -> &mut Vec<Block8x8f> {
let idx = self.current_idx();
&mut self.y[idx]
}
#[inline]
fn current_cb_mut(&mut self) -> &mut Vec<Block8x8f> {
let idx = self.current_idx();
&mut self.cb[idx]
}
#[inline]
fn current_cr_mut(&mut self) -> &mut Vec<Block8x8f> {
let idx = self.current_idx();
&mut self.cr[idx]
}
#[inline]
fn prev_y(&self) -> &Vec<Block8x8f> {
&self.y[self.prev_idx()]
}
#[inline]
fn prev_cb(&self) -> &Vec<Block8x8f> {
&self.cb[self.prev_idx()]
}
#[inline]
fn prev_cr(&self) -> &Vec<Block8x8f> {
&self.cr[self.prev_idx()]
}
}
#[allow(clippy::too_many_arguments)]
#[allow(unused_variables, unused_mut)]
fn quantize_chroma_blocks(
pending: &[Block8x8f],
output: &mut Vec<[i16; DCT_BLOCK_SIZE]>,
mut dc_raw_output: Option<&mut Vec<i32>>,
all_aq_strengths: &[f32],
quant_simd: &QuantTableSimd,
zero_bias_simd: &ZeroBiasSimd,
quant_values: &[u16; DCT_BLOCK_SIZE],
#[cfg(feature = "trellis")] hybrid_ctx: Option<&HybridQuantContext>,
use_trellis: bool,
chroma_blocks_h: usize,
chroma_blocks_v: usize,
y_blocks_w: usize,
y_blocks_h: usize,
) {
let blocks_h = chroma_blocks_h.max(1);
let global_chroma_by = output.len() / blocks_h;
for (i, dct) in pending.iter().enumerate() {
#[cfg(feature = "trellis")]
if let Some(dc_raw) = dc_raw_output.as_deref_mut() {
let row0: [f32; 8] = dct.rows[0].into();
let dc_val = (row0[0] * 64.0).round() as i32;
dc_raw.push(dc_val);
}
let bx = i % blocks_h;
let local_by = i / blocks_h;
let y_bx = (bx * y_blocks_w) / blocks_h;
let chroma_by = global_chroma_by + local_by;
let y_by = (chroma_by * y_blocks_h) / chroma_blocks_v.max(1);
let global_aq_idx = y_by * y_blocks_w + y_bx.min(y_blocks_w.saturating_sub(1));
let aq_strength = if global_aq_idx < all_aq_strengths.len() {
all_aq_strengths[global_aq_idx]
} else {
0.08 };
#[cfg(feature = "trellis")]
let zigzag = if use_trellis {
let dct_arr = dct.to_array();
let natural = hybrid_ctx.unwrap().quantize_block(
&dct_arr,
quant_values,
aq_strength,
1.0,
false, );
let mut result = [0i16; DCT_BLOCK_SIZE];
for j in 0..DCT_BLOCK_SIZE {
result[JPEG_ZIGZAG_ORDER[j] as usize] = natural[j];
}
result
} else {
quant_simd.quantize_with_zero_bias_zigzag(dct, zero_bias_simd, aq_strength)
};
#[cfg(not(feature = "trellis"))]
let zigzag = quant_simd.quantize_with_zero_bias_zigzag(dct, zero_bias_simd, aq_strength);
output.push(zigzag);
}
}
#[derive(Debug)]
pub struct StripProcessor {
pub(super) layout: LayoutParams,
pub(super) pixel_format: PixelFormat,
pub(super) chroma_downsampling: DownsamplingMethod,
pub(super) y_strip: Vec<f32>,
pub(super) cb_strip: Vec<f32>,
pub(super) cr_strip: Vec<f32>,
pub(super) cb_down: Vec<f32>,
pub(super) cr_down: Vec<f32>,
y_blocks: Vec<[i16; DCT_BLOCK_SIZE]>,
cb_blocks: Vec<[i16; DCT_BLOCK_SIZE]>,
cr_blocks: Vec<[i16; DCT_BLOCK_SIZE]>,
y_dc_raw: Vec<i32>,
cb_dc_raw: Vec<i32>,
cr_dc_raw: Vec<i32>,
pending: PendingBuffers,
quant: QuantContext,
all_aq_strengths: Vec<f32>,
aq_state: StreamingAQ,
stats: crate::foundation::alloc::EncodeStats,
deringing: bool,
#[cfg(feature = "trellis")]
hybrid_ctx: Option<HybridQuantContext>,
#[cfg(target_arch = "x86_64")]
simd_token: Option<crate::encode::mage_simd::Desktop64>,
#[cfg(feature = "yuv")]
yuv_temp_y: Vec<u8>,
#[cfg(feature = "yuv")]
yuv_temp_cb: Vec<u8>,
#[cfg(feature = "yuv")]
yuv_temp_cr: Vec<u8>,
aq_strengths_buffer: Vec<f32>,
}
impl StripProcessor {
#[cfg(test)]
pub fn new(
width: usize,
height: usize,
subsampling: Subsampling,
pixel_format: PixelFormat,
) -> Result<Self> {
let quant = QuantContext::default_for_tests();
Self::with_xyb(
width,
height,
subsampling,
pixel_format,
DownsamplingMethod::Box,
0,
false,
quant,
true, )
}
pub fn with_xyb(
width: usize,
height: usize,
subsampling: Subsampling,
pixel_format: PixelFormat,
chroma_downsampling: DownsamplingMethod,
_restart_interval: u16,
use_xyb: bool,
quant: QuantContext,
aq_enabled: bool,
) -> Result<Self> {
Self::with_xyb_inner(
width,
height,
subsampling,
pixel_format,
chroma_downsampling,
_restart_interval,
use_xyb,
quant,
aq_enabled,
false,
)
}
pub fn with_xyb_streaming(
width: usize,
height: usize,
subsampling: Subsampling,
pixel_format: PixelFormat,
chroma_downsampling: DownsamplingMethod,
_restart_interval: u16,
use_xyb: bool,
quant: QuantContext,
aq_enabled: bool,
) -> Result<Self> {
Self::with_xyb_inner(
width,
height,
subsampling,
pixel_format,
chroma_downsampling,
_restart_interval,
use_xyb,
quant,
aq_enabled,
true,
)
}
fn with_xyb_inner(
width: usize,
height: usize,
subsampling: Subsampling,
pixel_format: PixelFormat,
chroma_downsampling: DownsamplingMethod,
_restart_interval: u16,
use_xyb: bool,
quant: QuantContext,
aq_enabled: bool,
streaming_through: bool,
) -> Result<Self> {
let layout = LayoutParams::new(width, height, subsampling, use_xyb);
let strip_height = layout.strip_height;
let padded_width = layout.padded_width;
let padded_c_width = layout.padded_c_width;
let padded_b_width = layout.padded_b_width;
let c_strip_height = layout.c_strip_height;
let b_strip_height = layout.b_strip_height;
let total_y_blocks = layout.total_y_blocks;
let total_c_blocks = layout.total_c_blocks;
let pending_y_capacity = layout.pending_y_capacity;
let pending_c_capacity = layout.pending_c_capacity;
let is_color = !pixel_format.is_grayscale();
let mut stats = EncodeStats::new();
let y_quant_01 = quant.y_quant.values[1]; let aq_state = StreamingAQ::new(&layout, y_quant_01, aq_enabled)?;
Ok(Self {
layout,
pixel_format,
chroma_downsampling,
y_strip: try_alloc_zeroed_f32_tracked(
padded_width * strip_height,
"y_strip",
&mut stats,
)?,
cb_strip: if is_color {
try_alloc_zeroed_f32_tracked(padded_width * strip_height, "cb_strip", &mut stats)?
} else {
Vec::new()
},
cr_strip: if is_color {
try_alloc_zeroed_f32_tracked(padded_width * strip_height, "cr_strip", &mut stats)?
} else {
Vec::new()
},
cb_down: if is_color {
try_alloc_zeroed_f32_tracked(
padded_c_width * c_strip_height,
"cb_down",
&mut stats,
)?
} else {
Vec::new()
},
cr_down: if is_color {
let cr_down_size = if use_xyb {
padded_b_width * b_strip_height
} else {
padded_c_width * c_strip_height
};
try_alloc_zeroed_f32_tracked(cr_down_size, "cr_down", &mut stats)?
} else {
Vec::new()
},
y_blocks: {
let cap = if streaming_through {
pending_y_capacity
} else {
total_y_blocks
};
try_with_capacity_tracked(cap, "y_blocks", &mut stats)?
},
cb_blocks: if is_color {
let cap = if streaming_through {
pending_c_capacity
} else {
total_c_blocks
};
try_with_capacity_tracked(cap, "cb_blocks", &mut stats)?
} else {
Vec::new()
},
cr_blocks: if is_color {
let cap = if streaming_through {
pending_c_capacity
} else {
total_c_blocks
};
try_with_capacity_tracked(cap, "cr_blocks", &mut stats)?
} else {
Vec::new()
},
y_dc_raw: Vec::new(),
cb_dc_raw: Vec::new(),
cr_dc_raw: Vec::new(),
pending: PendingBuffers {
y: [
try_with_capacity_tracked(pending_y_capacity, "pending_y[0]", &mut stats)?,
try_with_capacity_tracked(pending_y_capacity, "pending_y[1]", &mut stats)?,
],
cb: if is_color {
[
try_with_capacity_tracked(pending_c_capacity, "pending_cb[0]", &mut stats)?,
try_with_capacity_tracked(pending_c_capacity, "pending_cb[1]", &mut stats)?,
]
} else {
[Vec::new(), Vec::new()]
},
cr: if is_color {
[
try_with_capacity_tracked(pending_c_capacity, "pending_cr[0]", &mut stats)?,
try_with_capacity_tracked(pending_c_capacity, "pending_cr[1]", &mut stats)?,
]
} else {
[Vec::new(), Vec::new()]
},
current: false,
},
quant,
all_aq_strengths: {
let cap = if streaming_through {
pending_y_capacity
} else {
total_y_blocks
};
try_with_capacity_tracked(cap, "all_aq_strengths", &mut stats)?
},
aq_state,
stats,
deringing: true,
#[cfg(feature = "trellis")]
hybrid_ctx: None,
#[cfg(target_arch = "x86_64")]
simd_token: {
use archmage::SimdToken;
crate::encode::mage_simd::Desktop64::summon()
},
#[cfg(feature = "yuv")]
yuv_temp_y: if is_color {
vec![0u8; padded_width * strip_height]
} else {
Vec::new()
},
#[cfg(feature = "yuv")]
yuv_temp_cb: if is_color {
vec![0u8; padded_width * strip_height]
} else {
Vec::new()
},
#[cfg(feature = "yuv")]
yuv_temp_cr: if is_color {
vec![0u8; padded_width * strip_height]
} else {
Vec::new()
},
aq_strengths_buffer: vec![0.0f32; pending_y_capacity],
})
}
#[must_use]
pub fn encode_stats(&self) -> &EncodeStats {
&self.stats
}
pub fn y_blocks(&self) -> &[[i16; DCT_BLOCK_SIZE]] {
&self.y_blocks
}
pub fn cb_blocks(&self) -> &[[i16; DCT_BLOCK_SIZE]] {
&self.cb_blocks
}
pub fn cr_blocks(&self) -> &[[i16; DCT_BLOCK_SIZE]] {
&self.cr_blocks
}
pub fn clear_blocks(&mut self) {
self.y_blocks.clear();
self.cb_blocks.clear();
self.cr_blocks.clear();
self.all_aq_strengths.clear();
self.y_dc_raw.clear();
self.cb_dc_raw.clear();
self.cr_dc_raw.clear();
}
#[must_use]
pub fn take_blocks(&mut self) -> StripProcessorOutput {
StripProcessorOutput {
y_blocks: core::mem::take(&mut self.y_blocks),
cb_blocks: core::mem::take(&mut self.cb_blocks),
cr_blocks: core::mem::take(&mut self.cr_blocks),
aq_strengths: core::mem::take(&mut self.all_aq_strengths),
stats: EncodeStats::new(), y_dc_raw: core::mem::take(&mut self.y_dc_raw),
cb_dc_raw: core::mem::take(&mut self.cb_dc_raw),
cr_dc_raw: core::mem::take(&mut self.cr_dc_raw),
}
}
#[cfg(target_arch = "x86_64")]
#[inline]
#[must_use]
pub fn simd_token(&self) -> Option<crate::encode::mage_simd::Desktop64> {
self.simd_token
}
pub fn set_deringing(&mut self, enable: bool) {
self.deringing = enable;
}
#[cfg(feature = "trellis")]
pub fn set_trellis(&mut self, config: TrellisConfig) {
if config.is_enabled() {
self.hybrid_ctx = Some(HybridQuantContext::from_trellis_config(config));
} else {
self.hybrid_ctx = None;
}
}
#[cfg(feature = "trellis")]
pub fn set_hybrid(&mut self, config: crate::encode::trellis::HybridConfig) {
self.hybrid_ctx = Some(HybridQuantContext::new(config));
}
#[must_use]
pub fn is_xyb(&self) -> bool {
self.layout.use_xyb
}
pub fn strip_height(&self) -> usize {
self.layout.strip_height
}
pub fn subsampling(&self) -> Subsampling {
self.layout.subsampling
}
fn aq_input_strip(&self) -> &[f32] {
if self.layout.use_xyb {
&self.cb_strip
} else {
&self.y_strip
}
}
fn color_convert_strip(
&mut self,
rgb_strip: &[u8],
strip_y: usize,
actual_strip_height: usize,
) -> Result<bool> {
if self.layout.use_xyb {
self.convert_strip_to_xyb(rgb_strip, actual_strip_height)?;
return Ok(true); }
let uses_gamma_aware_fused = self.chroma_downsampling.uses_gamma_aware()
&& !self.pixel_format.is_grayscale()
&& self.layout.subsampling != Subsampling::S444;
if uses_gamma_aware_fused {
self.convert_strip_gamma_aware(rgb_strip, strip_y, actual_strip_height)?;
return Ok(true);
}
#[cfg(feature = "yuv")]
{
if self.layout.subsampling == Subsampling::S420
&& !self.pixel_format.is_grayscale()
&& self.convert_strip_to_ycbcr_420(rgb_strip, actual_strip_height)?
{
return Ok(true);
}
}
self.convert_strip_to_ycbcr(rgb_strip, actual_strip_height)?;
Ok(false)
}
pub fn process_strip(&mut self, rgb_strip: &[u8], strip_y: usize) -> Result<usize> {
let actual_strip_height = self.layout.strip_height.min(self.layout.height - strip_y);
let chroma_already_downsampled =
self.color_convert_strip(rgb_strip, strip_y, actual_strip_height)?;
if actual_strip_height < self.layout.strip_height {
self.pad_strips_vertically(actual_strip_height, self.layout.strip_height);
}
let need_chroma_downsample =
!self.pixel_format.is_grayscale() && !chroma_already_downsampled;
self.process_strip_common(strip_y, actual_strip_height, need_chroma_downsample)
}
pub fn process_strip_ycbcr_f32(
&mut self,
y_row: &[f32],
cb_row: &[f32],
cr_row: &[f32],
strip_y: usize,
) -> Result<usize> {
if self.layout.use_xyb {
return Err(crate::error::Error::unsupported_feature(
"YCbCr input not supported for XYB mode",
));
}
let actual_strip_height = self.layout.strip_height.min(self.layout.height - strip_y);
self.copy_ycbcr_to_strips(y_row, cb_row, cr_row, actual_strip_height)?;
if actual_strip_height < self.layout.strip_height {
self.pad_strips_vertically(actual_strip_height, self.layout.strip_height);
}
let need_chroma_downsample = !self.pixel_format.is_grayscale();
self.process_strip_common(strip_y, actual_strip_height, need_chroma_downsample)
}
pub fn process_strip_ycbcr_f32_subsampled(
&mut self,
y_row: &[f32],
cb_row: &[f32],
cr_row: &[f32],
strip_y: usize,
) -> Result<usize> {
if self.layout.use_xyb {
return Err(crate::error::Error::unsupported_feature(
"YCbCr input not supported for XYB mode",
));
}
let actual_strip_height = self.layout.strip_height.min(self.layout.height - strip_y);
self.copy_ycbcr_subsampled_to_strips(y_row, cb_row, cr_row, actual_strip_height)?;
if actual_strip_height < self.layout.strip_height {
self.pad_strips_vertically(actual_strip_height, self.layout.strip_height);
self.pad_chroma_down_vertically(actual_strip_height)?;
}
self.process_strip_common(strip_y, actual_strip_height, false)
}
fn process_strip_common(
&mut self,
strip_y: usize,
actual_strip_height: usize,
need_chroma_downsample: bool,
) -> Result<usize> {
let aq_input = if self.layout.use_xyb {
&self.cb_strip
} else {
&self.y_strip
};
let aq_count = self.aq_state.process_y_strip_into(
aq_input,
strip_y,
actual_strip_height,
&mut self.aq_strengths_buffer,
);
let downsample_height = if actual_strip_height < self.layout.strip_height {
self.layout.strip_height
} else {
actual_strip_height
};
if need_chroma_downsample {
self.downsample_chroma_strip(downsample_height)?;
}
if let Some(count) = aq_count {
let temp_buffer = std::mem::take(&mut self.aq_strengths_buffer);
self.quantize_prev_pending_imcu(&temp_buffer[..count]);
self.aq_strengths_buffer = temp_buffer;
self.pending.clear_prev();
}
self.dct_strip_blocks_to_pending(strip_y, downsample_height)
}
fn dct_strip_blocks_to_pending(
&mut self,
strip_y: usize,
strip_height: usize,
) -> Result<usize> {
let blocks_w = self.layout.blocks_w;
let strip_blocks_h = (strip_height + 7) / 8;
let start_block_y = strip_y / 8;
let height = self.layout.height;
let pending_idx = self.pending.current_idx();
let padded_width = self.layout.padded_width;
#[cfg(target_arch = "x86_64")]
let simd_token = self.simd_token;
#[cfg(not(target_arch = "x86_64"))]
let simd_token = ();
let y_size = strip_height * padded_width;
let max_block_y = (height + 7) / 8;
let actual_strip_blocks_h = strip_blocks_h.min(max_block_y.saturating_sub(start_block_y));
let blocks_added = actual_strip_blocks_h * blocks_w;
#[cfg(feature = "parallel")]
{
let deringing = if self.deringing {
Some(self.quant.y_quant.values[0])
} else {
None
};
super::parallel::parallel_dct_y_blocks(
&self.y_strip[..y_size],
blocks_w,
actual_strip_blocks_h,
padded_width,
deringing,
&mut self.pending.y[pending_idx],
);
}
#[cfg(not(feature = "parallel"))]
{
let start_idx = self.pending.y[pending_idx].len();
self.pending.y[pending_idx].resize(start_idx + blocks_added, Block8x8f::default());
let output = &mut self.pending.y[pending_idx][start_idx..];
let y_dc_quant = self.quant.y_quant.values[0];
let mut idx = 0;
for local_by in 0..actual_strip_blocks_h {
for bx in 0..blocks_w {
let mut block = extract_block_from_strip_wide(
&self.y_strip[..y_size],
bx,
local_by,
padded_width,
);
if self.deringing {
super::deringing::preprocess_deringing_block(&mut block, y_dc_quant);
}
output[idx] = forward_dct_dispatch(simd_token, &block);
idx += 1;
}
}
}
if !self.pixel_format.is_grayscale() {
if self.layout.use_xyb {
let y_size = strip_height * padded_width;
let cb_blocks_total = actual_strip_blocks_h * blocks_w;
let cb_start = self.pending.cb[pending_idx].len();
self.pending.cb[pending_idx]
.resize(cb_start + cb_blocks_total, Block8x8f::default());
let mut cb_idx = 0;
for local_by in 0..strip_blocks_h {
let global_by = start_block_y + local_by;
if global_by >= (height + 7) / 8 {
break;
}
for bx in 0..blocks_w {
let cb_block = extract_block_from_strip_wide(
&self.cb_strip[..y_size],
bx,
local_by,
padded_width,
);
self.pending.cb[pending_idx][cb_start + cb_idx] =
forward_dct_dispatch(simd_token, &cb_block);
cb_idx += 1;
}
}
let b_blocks_w = self.layout.b_blocks_w;
let b_strip_height = self.layout.b_strip_height;
let b_strip_blocks_h = (b_strip_height + 7) / 8;
let b_blocks_total = b_blocks_w * b_strip_blocks_h;
let padded_b_width = self.layout.padded_b_width;
let b_size = b_strip_height * padded_b_width;
let cr_start = self.pending.cr[pending_idx].len();
self.pending.cr[pending_idx]
.resize(cr_start + b_blocks_total, Block8x8f::default());
let mut cr_idx = 0;
for local_by in 0..b_strip_blocks_h {
for bx in 0..b_blocks_w {
let cr_block = extract_block_from_strip_wide(
&self.cr_down[..b_size],
bx,
local_by,
padded_b_width,
);
self.pending.cr[pending_idx][cr_start + cr_idx] =
forward_dct_dispatch(simd_token, &cr_block);
cr_idx += 1;
}
}
} else {
let c_blocks_w = self.layout.c_blocks_w;
let c_strip_height = self.layout.c_strip_height;
let c_strip_blocks_h = (c_strip_height + 7) / 8;
let c_blocks_total = c_blocks_w * c_strip_blocks_h;
let padded_c_width = self.layout.padded_c_width;
let c_size = c_strip_height * padded_c_width;
let cb_start = self.pending.cb[pending_idx].len();
let cr_start = self.pending.cr[pending_idx].len();
self.pending.cb[pending_idx]
.resize(cb_start + c_blocks_total, Block8x8f::default());
self.pending.cr[pending_idx]
.resize(cr_start + c_blocks_total, Block8x8f::default());
let mut idx = 0;
for local_by in 0..c_strip_blocks_h {
for bx in 0..c_blocks_w {
let cb_block = extract_block_from_strip_wide(
&self.cb_down[..c_size],
bx,
local_by,
padded_c_width,
);
self.pending.cb[pending_idx][cb_start + idx] =
forward_dct_dispatch(simd_token, &cb_block);
let cr_block = extract_block_from_strip_wide(
&self.cr_down[..c_size],
bx,
local_by,
padded_c_width,
);
self.pending.cr[pending_idx][cr_start + idx] =
forward_dct_dispatch(simd_token, &cr_block);
idx += 1;
}
}
}
}
self.pending.swap();
Ok(blocks_added)
}
fn quantize_prev_pending_imcu(&mut self, aq_strengths: &[f32]) {
let buffer_idx = self.pending.prev_idx();
let quant = &self.quant;
#[cfg(feature = "trellis")]
let use_trellis = self.hybrid_ctx.is_some();
#[cfg(not(feature = "trellis"))]
let use_trellis = false;
#[cfg(feature = "trellis")]
let store_dc_raw = self
.hybrid_ctx
.as_ref()
.is_some_and(|ctx| ctx.is_dc_trellis_enabled());
#[cfg(not(feature = "trellis"))]
let store_dc_raw = false;
for (i, dct) in self.pending.y[buffer_idx].iter().enumerate() {
let aq_strength = aq_strengths.get(i).copied().unwrap_or(0.08);
if store_dc_raw {
let row0: [f32; 8] = dct.rows[0].into();
let dc_raw = (row0[0] * 64.0).round() as i32;
self.y_dc_raw.push(dc_raw);
}
#[cfg(feature = "trellis")]
let zigzag = if use_trellis {
let dct_arr = dct.to_array();
let natural = self.hybrid_ctx.as_ref().unwrap().quantize_block(
&dct_arr,
&quant.y_quant.values,
aq_strength,
1.0, true, );
let mut result = [0i16; DCT_BLOCK_SIZE];
for j in 0..DCT_BLOCK_SIZE {
result[JPEG_ZIGZAG_ORDER[j] as usize] = natural[j];
}
result
} else {
quant.y_quant_simd.quantize_with_zero_bias_zigzag(
dct,
&quant.y_zero_bias_simd,
aq_strength,
)
};
#[cfg(not(feature = "trellis"))]
let zigzag = quant.y_quant_simd.quantize_with_zero_bias_zigzag(
dct,
&quant.y_zero_bias_simd,
aq_strength,
);
self.y_blocks.push(zigzag);
self.all_aq_strengths.push(aq_strength);
}
{
let y_blocks_w = self.layout.y_blocks_w;
let y_blocks_h = self.layout.y_blocks_h;
let c_blocks_w = self.layout.c_blocks_w;
let c_blocks_h = self.layout.c_blocks_h;
let cb_dc_raw = if store_dc_raw {
Some(&mut self.cb_dc_raw)
} else {
None
};
quantize_chroma_blocks(
&self.pending.cb[buffer_idx],
&mut self.cb_blocks,
cb_dc_raw,
&self.all_aq_strengths,
&quant.cb_quant_simd,
&quant.cb_zero_bias_simd,
&quant.cb_quant.values,
#[cfg(feature = "trellis")]
self.hybrid_ctx.as_ref(),
use_trellis,
c_blocks_w,
c_blocks_h,
y_blocks_w,
y_blocks_h,
);
let cr_blocks_h = if self.layout.use_xyb {
self.layout.b_blocks_w
} else {
c_blocks_w
};
let cr_blocks_v = if self.layout.use_xyb {
self.layout.b_blocks_h
} else {
c_blocks_h
};
let cr_dc_raw = if store_dc_raw {
Some(&mut self.cr_dc_raw)
} else {
None
};
quantize_chroma_blocks(
&self.pending.cr[buffer_idx],
&mut self.cr_blocks,
cr_dc_raw,
&self.all_aq_strengths,
&quant.cr_quant_simd,
&quant.cr_zero_bias_simd,
&quant.cr_quant.values,
#[cfg(feature = "trellis")]
self.hybrid_ctx.as_ref(),
use_trellis,
cr_blocks_h,
cr_blocks_v,
y_blocks_w,
y_blocks_h,
);
}
}
pub fn finalize(mut self) -> Result<StripProcessorOutput> {
let flush_count = self.aq_state.flush_into(&mut self.aq_strengths_buffer);
if let Some(count) = flush_count {
if !self.pending.prev_y().is_empty() {
let temp_buffer = std::mem::take(&mut self.aq_strengths_buffer);
self.quantize_prev_pending_imcu(&temp_buffer[..count]);
self.aq_strengths_buffer = temp_buffer;
}
}
if !self.pending.current_y().is_empty() {
let default_aq = try_alloc_filled(
self.pending.current_y().len(),
0.08f32,
"default_aq_strengths",
)?;
self.pending.swap();
self.quantize_prev_pending_imcu(&default_aq);
}
#[cfg(feature = "trellis")]
if !self.y_dc_raw.is_empty() {
self.apply_dc_trellis();
}
Ok(StripProcessorOutput {
y_blocks: self.y_blocks,
cb_blocks: self.cb_blocks,
cr_blocks: self.cr_blocks,
aq_strengths: self.all_aq_strengths,
stats: self.stats,
y_dc_raw: self.y_dc_raw,
cb_dc_raw: self.cb_dc_raw,
cr_dc_raw: self.cr_dc_raw,
})
}
#[cfg(feature = "trellis")]
fn apply_dc_trellis(&mut self) {
let Some(ref hybrid_ctx) = self.hybrid_ctx else {
return;
};
if !hybrid_ctx.is_dc_trellis_enabled() {
return;
}
let config = hybrid_ctx.trellis_config();
let lambda1 = config.lambda_log_scale1();
let lambda2 = config.lambda_log_scale2();
let delta_dc_weight = config.get_delta_dc_weight();
if !self.y_dc_raw.is_empty() && !self.y_blocks.is_empty() {
let blocks_w = self.layout.y_blocks_w;
let dc_quantval = self.quant.y_quant.values[0];
let dc_table = hybrid_ctx.luma_dc_rate_table();
dc_trellis_channel_row_by_row(
&self.y_dc_raw,
&mut self.y_blocks,
blocks_w,
dc_quantval,
dc_table,
lambda1,
lambda2,
delta_dc_weight,
);
}
if !self.cb_dc_raw.is_empty() && !self.cb_blocks.is_empty() {
let blocks_w = self.layout.c_blocks_w;
let dc_quantval = self.quant.cb_quant.values[0];
let dc_table = hybrid_ctx.chroma_dc_rate_table();
dc_trellis_channel_row_by_row(
&self.cb_dc_raw,
&mut self.cb_blocks,
blocks_w,
dc_quantval,
dc_table,
lambda1,
lambda2,
delta_dc_weight,
);
}
if !self.cr_dc_raw.is_empty() && !self.cr_blocks.is_empty() {
let blocks_w = self.layout.c_blocks_w;
let dc_quantval = self.quant.cr_quant.values[0];
let dc_table = hybrid_ctx.chroma_dc_rate_table();
dc_trellis_channel_row_by_row(
&self.cr_dc_raw,
&mut self.cr_blocks,
blocks_w,
dc_quantval,
dc_table,
lambda1,
lambda2,
delta_dc_weight,
);
}
}
}
#[cfg(feature = "trellis")]
#[allow(clippy::too_many_arguments)]
fn dc_trellis_channel_row_by_row(
dc_raw: &[i32],
blocks: &mut [[i16; DCT_BLOCK_SIZE]],
blocks_w: usize,
dc_quantval: u16,
dc_table: &crate::encode::trellis::RateTable,
lambda1: f32,
lambda2: f32,
delta_dc_weight: f32,
) {
if dc_raw.is_empty() || blocks.is_empty() || blocks_w == 0 {
return;
}
let num_blocks = blocks.len();
let blocks_h = (num_blocks + blocks_w - 1) / blocks_w;
let raw_blocks: Vec<[i32; DCT_BLOCK_SIZE]> = dc_raw
.iter()
.map(|&dc| {
let mut block = [0i32; DCT_BLOCK_SIZE];
block[0] = dc;
block
})
.collect();
let mut natural_blocks: Vec<[i16; DCT_BLOCK_SIZE]> = blocks
.iter()
.map(|zigzag| {
let mut natural = [0i16; DCT_BLOCK_SIZE];
for (i, &zz_idx) in JPEG_ZIGZAG_ORDER.iter().enumerate() {
natural[i] = zigzag[zz_idx as usize];
}
natural
})
.collect();
let mut above_raw_dc: Vec<i32> = vec![0; blocks_w];
let mut above_quant_dc: Vec<i16> = vec![0; blocks_w];
let mut current_raw_dc: Vec<i32> = vec![0; blocks_w];
let mut last_dc: i16 = 0;
for row in 0..blocks_h {
let row_start = row * blocks_w;
let row_end = (row_start + blocks_w).min(num_blocks);
let row_len = row_end - row_start;
let indices: Vec<usize> = (row_start..row_end).collect();
for col in 0..row_len {
current_raw_dc[col] = raw_blocks[row_start + col][0];
}
let above_data = if delta_dc_weight > 0.0 && row > 0 {
Some((
above_raw_dc[..row_len].as_ref(),
above_quant_dc[..row_len].as_ref(),
))
} else {
None
};
last_dc = crate::encode::trellis::dc_trellis_optimize_indexed(
&raw_blocks,
&mut natural_blocks,
&indices,
dc_quantval,
dc_table,
last_dc,
lambda1,
lambda2,
delta_dc_weight,
above_data,
);
above_raw_dc[..row_len].copy_from_slice(¤t_raw_dc[..row_len]);
for col in 0..row_len {
above_quant_dc[col] = natural_blocks[row_start + col][0];
}
}
for (i, natural) in natural_blocks.into_iter().enumerate() {
for (j, &zz_idx) in JPEG_ZIGZAG_ORDER.iter().enumerate() {
blocks[i][zz_idx as usize] = natural[j];
}
}
}
#[derive(Debug)]
pub struct StripProcessorOutput {
pub y_blocks: Vec<[i16; DCT_BLOCK_SIZE]>,
pub cb_blocks: Vec<[i16; DCT_BLOCK_SIZE]>,
pub cr_blocks: Vec<[i16; DCT_BLOCK_SIZE]>,
pub aq_strengths: Vec<f32>,
pub stats: EncodeStats,
pub y_dc_raw: Vec<i32>,
pub cb_dc_raw: Vec<i32>,
pub cr_dc_raw: Vec<i32>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_strip_processor_creation() {
let processor = StripProcessor::new(1920, 1080, Subsampling::S420, PixelFormat::Rgb);
assert!(processor.is_ok());
let processor = processor.unwrap();
assert_eq!(processor.strip_height(), 16); }
#[test]
fn test_strip_processor_444_strip_height() {
let processor = StripProcessor::new(1920, 1080, Subsampling::S444, PixelFormat::Rgb);
assert!(processor.is_ok());
let processor = processor.unwrap();
assert_eq!(processor.strip_height(), 8); }
#[test]
#[cfg(feature = "decoder")]
fn test_strip_partial_mcu_heights() {
use crate::encode::{EncoderConfig, PixelLayout};
use enough::Unstoppable;
let width = 64usize;
let mut results = Vec::new();
for height in 56..=72 {
let mut rgb = vec![0u8; width * height * 3];
for y in 0..height {
for x in 0..width {
let idx = (y * width + x) * 3;
let block_x = x / 8;
let block_y = y / 8;
let colors: [(u8, u8, u8); 8] = [
(200, 100, 80), (80, 180, 100), (100, 80, 180), (180, 180, 80), (80, 150, 180), (180, 80, 150), (140, 140, 140), (220, 180, 140), ];
let color_idx = (block_x + block_y * 3) % 8;
let (r, g, b) = colors[color_idx];
let intra_x = (x % 8) as i16;
let intra_y = (y % 8) as i16;
let grad = ((intra_x + intra_y) / 2) as u8;
rgb[idx] = r.saturating_add(grad);
rgb[idx + 1] = g.saturating_sub(grad / 2);
rgb[idx + 2] = b.saturating_add(grad / 2);
}
}
let config = EncoderConfig::ycbcr(85.0, crate::encode::ChromaSubsampling::Quarter)
.progressive(false)
.optimize_huffman(true);
let mut enc = config
.encode_from_bytes(width as u32, height as u32, PixelLayout::Rgb8Srgb)
.expect("encoder creation failed");
enc.push_packed(&rgb, Unstoppable).expect("push failed");
let jpeg_strip = enc.finish().expect("strip encode failed");
let decoded_strip = crate::decode::Decoder::new()
.decode(&jpeg_strip, enough::Unstoppable)
.expect("strip decode failed");
assert_eq!(
decoded_strip.width, width as u32,
"strip width mismatch at height {}",
height
);
assert_eq!(
decoded_strip.height, height as u32,
"strip height mismatch at height {}",
height
);
let expected_size = width * height * 3;
let decoded_pixels = decoded_strip.pixels_u8().unwrap();
assert_eq!(
decoded_pixels.len(),
expected_size,
"strip data size mismatch at height {}: got {} expected {}",
height,
decoded_pixels.len(),
expected_size
);
let mut sum_sq_err: u64 = 0;
let mut max_diff: i32 = 0;
for (&orig, &dec) in rgb.iter().zip(decoded_pixels.iter()) {
let diff = (orig as i32 - dec as i32).abs();
sum_sq_err += (diff as u64) * (diff as u64);
if diff > max_diff {
max_diff = diff;
}
}
let mse = sum_sq_err as f64 / expected_size as f64;
let psnr = if mse > 0.0 {
10.0 * (255.0 * 255.0 / mse).log10()
} else {
100.0
};
results.push((height, max_diff, jpeg_strip.len(), psnr));
}
println!("\nHeight MaxDiff Size PSNR");
for (height, max_diff, size, psnr) in &results {
let marker = if *psnr < 25.0 { " <-- LOW" } else { "" };
println!(
"{:>6} {:>8} {:>8} {:>8.2}{}",
height, max_diff, size, psnr, marker
);
}
for (height, _, _, psnr) in &results {
assert!(
*psnr > 25.0,
"strip encoder PSNR {} at height {} is too low (expected > 25)",
psnr,
height
);
}
}
}