use alloc::vec;
use alloc::vec::Vec;
use core::mem;
#[allow(unused_imports)]
use whereat::at;
use super::api::EncodeError;
use super::api::PixelLayout;
use super::arithmetic::ArithmeticEncoder;
use super::cost::{
LevelCosts, ProbaStats, analyze_image, assign_segments_kmeans, classify_image_type,
compute_segment_quant, content_type_to_tuning,
};
use super::vec_writer::VecWriter;
use crate::common::prediction::*;
use crate::common::types::Frame;
use crate::common::types::*;
use crate::decoder::yuv::convert_image_sharp_yuv;
use crate::decoder::yuv::convert_image_y;
use crate::decoder::yuv::convert_image_yuv;
mod header;
mod mode_selection;
mod prediction;
mod residuals;
use super::fast_math::quality_to_quant_index;
const DQ_LIMIT: f32 = 0.4;
struct PassStats {
is_first: bool,
dq: f32,
q: f32,
last_q: f32,
qmin: f32,
qmax: f32,
value: f64, last_value: f64, target: f64, }
impl PassStats {
fn new_for_size(target_size: u32, quality: u8, qmin: u8, qmax: u8) -> Self {
let qmin_f = f32::from(qmin);
let qmax_f = f32::from(qmax);
let q = f32::from(quality).clamp(qmin_f, qmax_f);
Self {
is_first: true,
dq: 10.0,
q,
last_q: q,
qmin: qmin_f,
qmax: qmax_f,
value: 0.0,
last_value: 0.0,
target: f64::from(target_size),
}
}
fn new_for_psnr(target_psnr: f32, quality: u8, qmin: u8, qmax: u8) -> Self {
let qmin_f = f32::from(qmin);
let qmax_f = f32::from(qmax);
let q = f32::from(quality).clamp(qmin_f, qmax_f);
Self {
is_first: true,
dq: 10.0,
q,
last_q: q,
qmin: qmin_f,
qmax: qmax_f,
value: 0.0,
last_value: 0.0,
target: f64::from(target_psnr),
}
}
fn compute_next_q(&mut self) -> f32 {
let dq = if self.is_first {
self.is_first = false;
if self.value > self.target {
-self.dq
} else {
self.dq
}
} else if (self.value - self.last_value).abs() > f64::EPSILON {
let slope = (self.target - self.value) / (self.last_value - self.value);
(slope * f64::from(self.last_q - self.q)) as f32
} else {
0.0 };
self.dq = dq.clamp(-30.0, 30.0);
self.last_q = self.q;
self.last_value = self.value;
self.q = (self.q + self.dq).clamp(self.qmin, self.qmax);
self.q
}
fn is_converged(&self) -> bool {
self.dq.abs() <= DQ_LIMIT
}
}
#[inline]
pub(super) fn sse_16x16_luma(
src_y: &[u8],
src_width: usize,
mbx: usize,
mby: usize,
pred: &[u8; LUMA_BLOCK_SIZE],
) -> u32 {
use archmage::prelude::*;
incant!(
sse_16x16_luma_dispatch(src_y, src_width, mbx, mby, pred),
[v3, neon, wasm128, scalar]
)
}
#[cfg(target_arch = "x86_64")]
#[cfg(target_arch = "x86_64")]
#[inline(always)]
fn sse_16x16_luma_dispatch_v3(
_token: archmage::X64V3Token,
src_y: &[u8],
src_width: usize,
mbx: usize,
mby: usize,
pred: &[u8; LUMA_BLOCK_SIZE],
) -> u32 {
crate::common::simd_sse::sse_16x16_luma(src_y, src_width, mbx, mby, pred)
}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
fn sse_16x16_luma_dispatch_neon(
token: archmage::NeonToken,
src_y: &[u8],
src_width: usize,
mbx: usize,
mby: usize,
pred: &[u8; LUMA_BLOCK_SIZE],
) -> u32 {
crate::common::simd_neon::sse_16x16_luma_neon(token, src_y, src_width, mbx, mby, pred)
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
fn sse_16x16_luma_dispatch_wasm128(
token: archmage::Wasm128Token,
src_y: &[u8],
src_width: usize,
mbx: usize,
mby: usize,
pred: &[u8; LUMA_BLOCK_SIZE],
) -> u32 {
crate::common::simd_wasm::sse_16x16_luma_wasm_entry(token, src_y, src_width, mbx, mby, pred)
}
#[inline(always)]
fn sse_16x16_luma_dispatch_scalar(
_token: archmage::ScalarToken,
src_y: &[u8],
src_width: usize,
mbx: usize,
mby: usize,
pred: &[u8; LUMA_BLOCK_SIZE],
) -> u32 {
let mut sse = 0u32;
let src_base = mby * 16 * src_width + mbx * 16;
for y in 0..16 {
let src_row = src_base + y * src_width;
let pred_row = (y + 1) * LUMA_STRIDE + 1;
for x in 0..16 {
let diff = i32::from(src_y[src_row + x]) - i32::from(pred[pred_row + x]);
sse += (diff * diff) as u32;
}
}
sse
}
#[inline]
pub(super) fn sse_8x8_chroma(
src_uv: &[u8],
src_width: usize,
mbx: usize,
mby: usize,
pred: &[u8; CHROMA_BLOCK_SIZE],
) -> u32 {
use archmage::prelude::*;
incant!(
sse_8x8_chroma_dispatch(src_uv, src_width, mbx, mby, pred),
[v3, neon, wasm128, scalar]
)
}
#[cfg(target_arch = "x86_64")]
#[cfg(target_arch = "x86_64")]
#[inline(always)]
fn sse_8x8_chroma_dispatch_v3(
_token: archmage::X64V3Token,
src_uv: &[u8],
src_width: usize,
mbx: usize,
mby: usize,
pred: &[u8; CHROMA_BLOCK_SIZE],
) -> u32 {
crate::common::simd_sse::sse_8x8_chroma(src_uv, src_width, mbx, mby, pred)
}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
fn sse_8x8_chroma_dispatch_neon(
token: archmage::NeonToken,
src_uv: &[u8],
src_width: usize,
mbx: usize,
mby: usize,
pred: &[u8; CHROMA_BLOCK_SIZE],
) -> u32 {
crate::common::simd_neon::sse_8x8_chroma_neon(token, src_uv, src_width, mbx, mby, pred)
}
#[cfg(target_arch = "wasm32")]
#[inline(always)]
fn sse_8x8_chroma_dispatch_wasm128(
token: archmage::Wasm128Token,
src_uv: &[u8],
src_width: usize,
mbx: usize,
mby: usize,
pred: &[u8; CHROMA_BLOCK_SIZE],
) -> u32 {
crate::common::simd_wasm::sse_8x8_chroma_wasm_entry(token, src_uv, src_width, mbx, mby, pred)
}
#[inline(always)]
fn sse_8x8_chroma_dispatch_scalar(
_token: archmage::ScalarToken,
src_uv: &[u8],
src_width: usize,
mbx: usize,
mby: usize,
pred: &[u8; CHROMA_BLOCK_SIZE],
) -> u32 {
let mut sse = 0u32;
let src_base = mby * 8 * src_width + mbx * 8;
for y in 0..8 {
let src_row = src_base + y * src_width;
let pred_row = (y + 1) * CHROMA_STRIDE + 1;
for x in 0..8 {
let diff = i32::from(src_uv[src_row + x]) - i32::from(pred[pred_row + x]);
sse += (diff * diff) as u32;
}
}
sse
}
#[derive(Clone, Copy, Default)]
struct Complexity {
y2: u8,
y: [u8; 4],
u: [u8; 2],
v: [u8; 2],
}
impl Complexity {
fn clear(&mut self, include_y2: bool) {
self.y = [0; 4];
self.u = [0; 2];
self.v = [0; 2];
if include_y2 {
self.y2 = 0;
}
}
}
#[derive(Default)]
struct QuantizationIndices {
yac_abs: u8,
ydc_delta: Option<i8>,
y2dc_delta: Option<i8>,
y2ac_delta: Option<i8>,
uvdc_delta: Option<i8>,
uvac_delta: Option<i8>,
}
#[derive(Clone, Copy, Default)]
struct MacroblockInfo {
luma_mode: LumaMode,
luma_bpred: Option<[IntraMode; 16]>,
chroma_mode: ChromaMode,
segment_id: Option<usize>,
coeffs_skipped: bool,
}
pub(super) type ChromaCoeffs = [i32; 16 * 4];
#[derive(Clone)]
struct QuantizedMbCoeffs {
y2_zigzag: [i32; 16],
y1_zigzag: [[i32; 16]; 16],
u_zigzag: [[i32; 16]; 4],
v_zigzag: [[i32; 16]; 4],
}
impl QuantizedMbCoeffs {
const ZERO: Self = Self {
y2_zigzag: [0; 16],
y1_zigzag: [[0; 16]; 16],
u_zigzag: [[0; 16]; 4],
v_zigzag: [[0; 16]; 4],
};
#[inline]
fn is_all_zero(&self, is_i4: bool, first_coeff_y1: usize) -> bool {
let mut acc: u32 = 0;
if !is_i4 {
for &c in &self.y2_zigzag {
acc |= c as u32;
}
}
for block in &self.y1_zigzag {
for &c in &block[first_coeff_y1..] {
acc |= c as u32;
}
}
for block in &self.u_zigzag {
for &c in block {
acc |= c as u32;
}
}
for block in &self.v_zigzag {
for &c in block {
acc |= c as u32;
}
}
acc == 0
}
}
struct Vp8Encoder<'a> {
writer: &'a mut Vec<u8>,
frame: Frame,
encoder: ArithmeticEncoder,
segments: [Segment; MAX_SEGMENTS],
segments_enabled: bool,
segments_update_map: bool,
segment_tree_probs: [Prob; 3],
segment_map: Vec<u8>,
loop_filter_adjustments: bool,
macroblock_no_skip_coeff: Option<u8>,
quantization_indices: QuantizationIndices,
token_probs: TokenProbTables,
proba_stats: ProbaStats,
updated_probs: Option<TokenProbTables>,
level_costs: LevelCosts,
do_trellis: bool,
do_trellis_i4_mode: bool,
do_error_diffusion: bool,
method: u8,
sns_strength: u8,
filter_strength: u8,
filter_sharpness: u8,
num_segments: u8,
preset: super::api::Preset,
top_complexity: Vec<Complexity>,
left_complexity: Complexity,
top_b_pred: Vec<IntraMode>,
left_b_pred: [IntraMode; 4],
macroblock_width: u16,
macroblock_height: u16,
partitions: Vec<ArithmeticEncoder>,
left_border_y: [u8; 16 + 1],
left_border_u: [u8; 8 + 1],
left_border_v: [u8; 8 + 1],
top_border_y: Vec<u8>,
top_border_u: Vec<u8>,
top_border_v: Vec<u8>,
top_derr: Vec<[[i8; 2]; 2]>,
left_derr: [[i8; 2]; 2],
token_buffer: Option<residuals::TokenBuffer>,
stored_mb_info: Vec<MacroblockInfo>,
stored_mb_coeffs: Vec<QuantizedMbCoeffs>,
}
impl<'a> Vp8Encoder<'a> {
fn new(writer: &'a mut Vec<u8>) -> Self {
Self {
writer,
frame: Frame::default(),
encoder: ArithmeticEncoder::new(),
segments: core::array::from_fn(|_| Segment::default()),
segments_enabled: false,
segments_update_map: false,
segment_tree_probs: [255, 255, 255], segment_map: Vec::new(),
loop_filter_adjustments: false,
macroblock_no_skip_coeff: None,
quantization_indices: QuantizationIndices::default(),
token_probs: Default::default(),
proba_stats: ProbaStats::new(),
updated_probs: None,
level_costs: LevelCosts::new(),
do_trellis: true,
do_trellis_i4_mode: false,
do_error_diffusion: true,
method: 4,
sns_strength: 50,
filter_strength: 60,
filter_sharpness: 0,
num_segments: 4,
preset: super::api::Preset::Default,
top_complexity: Vec::new(),
left_complexity: Complexity::default(),
top_b_pred: Vec::new(),
left_b_pred: [IntraMode::default(); 4],
macroblock_width: 0,
macroblock_height: 0,
partitions: vec![ArithmeticEncoder::new()],
left_border_y: [0u8; 16 + 1],
left_border_u: [0u8; 8 + 1],
left_border_v: [0u8; 8 + 1],
top_border_y: Vec::new(),
top_border_u: Vec::new(),
top_border_v: Vec::new(),
top_derr: Vec::new(),
left_derr: [[0; 2]; 2],
token_buffer: None,
stored_mb_info: Vec::new(),
stored_mb_coeffs: Vec::new(),
}
}
#[inline]
fn get_segment_for_mb(&self, mbx: usize, mby: usize) -> &Segment {
let segment_id = if self.segments_enabled && !self.segment_map.is_empty() {
let mb_idx = mby * usize::from(self.macroblock_width) + mbx;
self.segment_map[mb_idx] as usize
} else {
0
};
&self.segments[segment_id]
}
#[inline]
fn get_segment_id_for_mb(&self, mbx: usize, mby: usize) -> Option<usize> {
if self.segments_enabled && !self.segment_map.is_empty() {
let mb_idx = mby * usize::from(self.macroblock_width) + mbx;
Some(self.segment_map[mb_idx] as usize)
} else {
None
}
}
fn compute_updated_probabilities(&mut self) -> bool {
let mut updated = COEFF_PROBS;
let mut has_changed = false;
for t in 0..4 {
for b in 0..8 {
for c in 0..3 {
for p in 0..11 {
let default_prob = COEFF_PROBS[t][b][c][p];
let update_prob = COEFF_UPDATE_PROBS[t][b][c][p];
let (should_update, new_p, _savings) =
self.proba_stats
.should_update(t, b, c, p, default_prob, update_prob);
if should_update {
updated[t][b][c][p] = new_p;
has_changed |= new_p != default_prob;
}
}
}
}
}
self.updated_probs = Some(updated);
has_changed
}
fn reset_for_new_pass(&mut self) {
for complexity in self.top_complexity.iter_mut() {
*complexity = Complexity::default();
}
self.left_complexity = Complexity::default();
for pred in self.top_b_pred.iter_mut() {
*pred = IntraMode::default();
}
self.left_b_pred = [IntraMode::default(); 4];
self.left_border_y = [129u8; 16 + 1];
self.left_border_u = [129u8; 8 + 1];
self.left_border_v = [129u8; 8 + 1];
for val in self.top_border_y.iter_mut() {
*val = 127;
}
for val in self.top_border_u.iter_mut() {
*val = 127;
}
for val in self.top_border_v.iter_mut() {
*val = 127;
}
self.left_derr = [[0; 2]; 2];
for derr in self.top_derr.iter_mut() {
*derr = [[0; 2]; 2];
}
let num_pixels =
usize::from(self.macroblock_width) * usize::from(self.macroblock_height) * 256; let estimated_partition_size = num_pixels / 4;
self.partitions = vec![ArithmeticEncoder::with_capacity(estimated_partition_size)];
self.encoder = ArithmeticEncoder::with_capacity(1024);
}
#[allow(clippy::too_many_arguments)]
fn encode_image(
&mut self,
data: &[u8],
color: PixelLayout,
width: u16,
height: u16,
stride: usize,
params: &super::api::EncoderParams,
stop: &dyn enough::Stop,
progress: &dyn super::api::EncodeProgress,
) -> Result<super::api::EncodeStats, EncodeError> {
self.method = params.method.min(6); self.do_trellis = self.method >= 5;
self.do_trellis_i4_mode = self.method >= 6;
self.sns_strength = params.sns_strength.min(100);
self.filter_strength = params.filter_strength.min(100);
self.filter_sharpness = params.filter_sharpness.min(7);
self.num_segments = params.num_segments.clamp(1, 4);
self.preset = params.preset;
let argb_converted;
let (data, color) = if color == PixelLayout::Argb8 {
let w = usize::from(width);
let h = usize::from(height);
let bpp = 4usize;
let stride_bytes = stride * bpp;
let row_bytes = w * bpp;
let mut out = alloc::vec![0u8; w * h * 4];
for y in 0..h {
garb::bytes::argb_to_rgba(
&data[y * stride_bytes..y * stride_bytes + row_bytes],
&mut out[y * w * 4..(y + 1) * w * 4],
)
.expect("validated buffer sizes");
}
argb_converted = out;
(argb_converted.as_slice(), PixelLayout::Rgba8)
} else {
(data, color)
};
let (y_bytes, u_bytes, v_bytes) = if color == PixelLayout::Yuv420 {
let w = usize::from(width);
let h = usize::from(height);
let y_size = w * h;
let uv_w = w.div_ceil(2);
let uv_h = h.div_ceil(2);
let uv_size = uv_w * uv_h;
let y_plane = &data[..y_size];
let u_plane = &data[y_size..y_size + uv_size];
let v_plane = &data[y_size + uv_size..y_size + uv_size * 2];
crate::decoder::yuv::import_yuv420_planes(y_plane, u_plane, v_plane, width, height)
} else if params.use_sharp_yuv {
convert_image_sharp_yuv(data, color, width, height, stride)
} else {
match color {
PixelLayout::Rgb8 => convert_image_yuv::<3>(data, width, height, stride),
PixelLayout::Rgba8 => convert_image_yuv::<4>(data, width, height, stride),
PixelLayout::Bgr8 => {
crate::decoder::yuv::convert_image_yuv_bgr::<3>(data, width, height, stride)
}
PixelLayout::Bgra8 => {
crate::decoder::yuv::convert_image_yuv_bgr::<4>(data, width, height, stride)
}
PixelLayout::L8 => convert_image_y::<1>(data, width, height, stride),
PixelLayout::La8 => convert_image_y::<2>(data, width, height, stride),
PixelLayout::Yuv420 | PixelLayout::Argb8 => unreachable!(),
}
};
if color != PixelLayout::Yuv420 {
let bpp = match color {
PixelLayout::L8 => 1usize,
PixelLayout::La8 => 2,
PixelLayout::Rgb8 | PixelLayout::Bgr8 => 3,
PixelLayout::Rgba8 | PixelLayout::Bgra8 | PixelLayout::Argb8 => 4,
PixelLayout::Yuv420 => unreachable!(),
};
let w = usize::from(width);
let h = usize::from(height);
let min_size = if h > 0 {
stride * bpp * (h - 1) + w * bpp
} else {
0
};
assert!(
data.len() >= min_size,
"buffer too small: got {}, need at least {} for {}x{} stride={} {:?}",
data.len(),
min_size,
w,
h,
stride,
color
);
}
self.setup_encoding(
params.lossy_quality,
width,
height,
y_bytes,
u_bytes,
v_bytes,
);
if self.level_costs.is_dirty() {
self.level_costs.calculate(&self.token_probs);
}
let num_mb = usize::from(self.macroblock_width) * usize::from(self.macroblock_height);
let num_passes = 1;
let mut final_sse_y: u64 = 0;
let mut final_sse_u: u64 = 0;
let mut final_sse_v: u64 = 0;
let mut final_block_count_i4: u32 = 0;
let mut final_block_count_i16: u32 = 0;
let mut final_skip_mb: u32 = 0;
for pass in 0..num_passes {
let is_last_pass = pass == num_passes - 1;
self.token_buffer = Some(residuals::TokenBuffer::with_estimated_capacity(num_mb));
if pass == 0 || is_last_pass {
self.proba_stats.reset();
}
self.stored_mb_info.clear();
self.stored_mb_info.reserve(num_mb);
if num_passes > 1 {
self.stored_mb_coeffs.clear();
self.stored_mb_coeffs.reserve(num_mb);
}
if pass > 0 {
if let Some(ref updated) = self.updated_probs {
self.token_probs = *updated;
}
self.level_costs.mark_dirty();
self.level_costs.calculate(&self.token_probs);
}
self.reset_for_new_pass();
let max_count = (num_mb / 8).max(96) as i32; let mut refresh_countdown = max_count;
let mut total_mb: u32 = 0;
let mut skip_mb: u32 = 0;
let mut block_count_i4: u32 = 0;
let mut block_count_i16: u32 = 0;
let mut sse_y: u64 = 0;
let mut sse_u: u64 = 0;
let mut sse_v: u64 = 0;
let y_stride = usize::from(self.macroblock_width) * 16;
let uv_stride = usize::from(self.macroblock_width) * 8;
let mut last_progress_pct: u8 = 0;
for mby in 0..self.macroblock_height {
self.left_complexity = Complexity::default();
self.left_b_pred = [IntraMode::default(); 4];
self.left_derr = [[0; 2]; 2]; self.left_border_y = [129u8; 16 + 1];
self.left_border_u = [129u8; 8 + 1];
self.left_border_v = [129u8; 8 + 1];
for mbx in 0..self.macroblock_width {
if total_mb & 15 == 0 {
stop.check()?;
}
refresh_countdown -= 1;
if refresh_countdown < 0 {
self.compute_updated_probabilities();
refresh_countdown = max_count;
}
let macroblock_info = self.choose_macroblock_info(mbx.into(), mby.into());
let mbx_usize = usize::from(mbx);
if let Some(bpred) = macroblock_info.luma_bpred {
for x in 0..4 {
self.top_b_pred[mbx_usize * 4 + x] = bpred[3 * 4 + x];
}
for y in 0..4 {
self.left_b_pred[y] = bpred[y * 4 + 3];
}
} else {
let intra_mode = macroblock_info
.luma_mode
.into_intra()
.unwrap_or(IntraMode::DC);
for x in 0..4 {
self.top_b_pred[mbx_usize * 4 + x] = intra_mode;
}
for y in 0..4 {
self.left_b_pred[y] = intra_mode;
}
}
let y_block_data =
self.transform_luma_block(mbx.into(), mby.into(), ¯oblock_info);
let (u_block_data, v_block_data) = self.transform_chroma_blocks(
mbx.into(),
mby.into(),
macroblock_info.chroma_mode,
);
sse_y += u64::from(sse_16x16_luma(
&self.frame.ybuf,
y_stride,
usize::from(mbx),
usize::from(mby),
&y_block_data.pred_block,
));
sse_u += u64::from(sse_8x8_chroma(
&self.frame.ubuf,
uv_stride,
usize::from(mbx),
usize::from(mby),
&u_block_data.pred_block,
));
sse_v += u64::from(sse_8x8_chroma(
&self.frame.vbuf,
uv_stride,
usize::from(mbx),
usize::from(mby),
&v_block_data.pred_block,
));
if macroblock_info.luma_mode == LumaMode::B {
block_count_i4 += 1;
} else {
block_count_i16 += 1;
}
total_mb += 1;
let is_i4 = macroblock_info.luma_mode == LumaMode::B;
let first_coeff_y1 = if is_i4 { 0usize } else { 1 };
let mut mb_info = macroblock_info;
let store_coeffs = num_passes > 1;
if self.do_trellis {
let all_zero = self.check_all_coeffs_zero(
¯oblock_info,
&y_block_data.coeffs,
&u_block_data.coeffs,
&v_block_data.coeffs,
);
if all_zero {
skip_mb += 1;
mb_info.coeffs_skipped = true;
self.left_complexity
.clear(macroblock_info.luma_mode != LumaMode::B);
self.top_complexity[usize::from(mbx)]
.clear(macroblock_info.luma_mode != LumaMode::B);
if store_coeffs {
self.stored_mb_coeffs.push(QuantizedMbCoeffs::ZERO);
}
} else {
let stored_coeffs = self.record_residual_tokens_storing(
¯oblock_info,
mbx as usize,
&y_block_data.coeffs,
&u_block_data.coeffs,
&v_block_data.coeffs,
);
if store_coeffs {
self.stored_mb_coeffs.push(stored_coeffs);
}
}
} else {
let stored_coeffs = self.quantize_mb_coeffs(
¯oblock_info,
&y_block_data.coeffs,
&u_block_data.coeffs,
&v_block_data.coeffs,
);
let all_zero = stored_coeffs.is_all_zero(is_i4, first_coeff_y1);
if all_zero {
skip_mb += 1;
mb_info.coeffs_skipped = true;
self.left_complexity
.clear(macroblock_info.luma_mode != LumaMode::B);
self.top_complexity[usize::from(mbx)]
.clear(macroblock_info.luma_mode != LumaMode::B);
} else {
self.record_from_stored_coeffs(
¯oblock_info,
mbx as usize,
&stored_coeffs,
);
}
if store_coeffs {
self.stored_mb_coeffs.push(stored_coeffs);
}
}
self.stored_mb_info.push(mb_info);
}
let pct = ((u32::from(mby) + 1) * 100 / u32::from(self.macroblock_height)) as u8;
if pct > last_progress_pct {
last_progress_pct = pct;
progress.on_progress(pct.min(99))?; }
}
if total_mb > 0 {
let non_skip_mb = total_mb - skip_mb;
let prob = ((255 * non_skip_mb + total_mb / 2) / total_mb).min(255) as u8;
self.macroblock_no_skip_coeff = Some(prob.clamp(1, 254));
}
self.compute_updated_probabilities();
final_sse_y = sse_y;
final_sse_u = sse_u;
final_sse_v = sse_v;
final_block_count_i4 = block_count_i4;
final_block_count_i16 = block_count_i16;
final_skip_mb = skip_mb;
}
self.encode_compressed_frame_header();
let stored_mb_info = mem::take(&mut self.stored_mb_info);
for pred in self.top_b_pred.iter_mut() {
*pred = IntraMode::default();
}
self.left_b_pred = [IntraMode::default(); 4];
let mb_w = usize::from(self.macroblock_width);
for (idx, mb_info) in stored_mb_info.iter().enumerate() {
let mbx = idx % mb_w;
if mbx == 0 {
self.left_b_pred = [IntraMode::default(); 4];
}
self.write_macroblock_header(mb_info, mbx);
}
let final_probs = self.updated_probs.as_ref().unwrap_or(&self.token_probs);
let token_buf = self.token_buffer.take().unwrap();
token_buf.emit_tokens(&mut self.partitions[0], final_probs);
let compressed_header_encoder = mem::take(&mut self.encoder);
let compressed_header_bytes = compressed_header_encoder.flush_and_get_buffer();
const VP8_MAX_PARTITION0_SIZE: u32 = (1 << 19) - 1;
let partition0_size = compressed_header_bytes.len() as u32;
if partition0_size > VP8_MAX_PARTITION0_SIZE {
return Err(at!(EncodeError::Partition0Overflow {
size: partition0_size,
max: VP8_MAX_PARTITION0_SIZE,
})
.into());
}
self.write_uncompressed_frame_header(partition0_size);
self.writer.write_all(&compressed_header_bytes);
self.write_partitions();
self.stored_mb_info.clear();
let num_pixels_y =
u64::from(self.macroblock_width) * 16 * u64::from(self.macroblock_height) * 16;
let num_pixels_uv =
u64::from(self.macroblock_width) * 8 * u64::from(self.macroblock_height) * 8;
let psnr_y = sse_to_psnr(final_sse_y, num_pixels_y);
let psnr_u = sse_to_psnr(final_sse_u, num_pixels_uv);
let psnr_v = sse_to_psnr(final_sse_v, num_pixels_uv);
let total_sse = final_sse_y + final_sse_u + final_sse_v;
let total_pixels = num_pixels_y + 2 * num_pixels_uv;
let psnr_all = sse_to_psnr(total_sse, total_pixels);
let mut stats = super::api::EncodeStats {
psnr: [psnr_y, psnr_u, psnr_v, psnr_all, 0.0],
block_count_i4: final_block_count_i4,
block_count_i16: final_block_count_i16,
block_count_skip: final_skip_mb,
..Default::default()
};
for (i, segment) in self.segments.iter().enumerate().take(4) {
stats.segment_quant[i] = segment.quant_index;
stats.segment_level[i] = self.frame.filter_level;
}
progress.on_progress(100)?;
Ok(stats)
}
#[allow(clippy::needless_range_loop)] fn simplify_segments(&mut self) {
let mut seg_map = [0u8, 1, 2, 3];
let num_segments = self.num_segments as usize;
let mut num_final_segments = 1usize;
for s1 in 1..num_segments {
let seg1 = &self.segments[s1];
let mut found = false;
for s2 in 0..num_final_segments {
let seg2 = &self.segments[s2];
if seg1.quant_index == seg2.quant_index
&& seg1.loopfilter_level == seg2.loopfilter_level
{
seg_map[s1] = s2 as u8;
found = true;
break;
}
}
if !found {
seg_map[s1] = num_final_segments as u8;
if num_final_segments != s1 {
self.segments[num_final_segments] = self.segments[s1].clone();
}
num_final_segments += 1;
}
}
if num_final_segments < num_segments {
for seg_id in &mut self.segment_map {
*seg_id = seg_map[*seg_id as usize];
}
self.num_segments = num_final_segments as u8;
for i in num_final_segments..num_segments {
self.segments[i] = self.segments[num_final_segments - 1].clone();
}
}
}
fn analyze_and_assign_segments(&mut self, base_quant_index: u8, quality: u8) {
let y_stride = usize::from(self.macroblock_width * 16);
let uv_stride = usize::from(self.macroblock_width * 8);
let width = usize::from(self.frame.width);
let height = usize::from(self.frame.height);
let analysis = analyze_image(
&self.frame.ybuf,
&self.frame.ubuf,
&self.frame.vbuf,
width,
height,
y_stride,
uv_stride,
self.method,
self.sns_strength,
);
if self.preset == super::api::Preset::Auto {
let content_type = classify_image_type(
&self.frame.ybuf,
width,
height,
y_stride,
&analysis.alpha_histogram,
);
let (sns, filter, sharp, segs) = content_type_to_tuning(content_type);
self.sns_strength = sns;
self.filter_strength = filter;
self.filter_sharpness = sharp;
self.num_segments = segs;
}
let (centers, alpha_to_segment, mid_alpha) =
assign_segments_kmeans(&analysis.alpha_histogram, usize::from(self.num_segments));
let min_center = centers.iter().copied().min().unwrap_or(0) as i32;
let max_center = centers.iter().copied().max().unwrap_or(255) as i32;
let range = if max_center == min_center {
1 } else {
max_center - min_center
};
const MIN_ALPHA_RANGE: i32 = 64;
let effective_range = range.max(MIN_ALPHA_RANGE);
self.segment_map = analysis
.mb_alphas
.iter()
.map(|&alpha| alpha_to_segment[alpha as usize])
.collect();
if self.num_segments > 1 {
super::cost::smooth_segment_map(
&mut self.segment_map,
usize::from(self.macroblock_width),
usize::from(self.macroblock_height),
);
}
let sns_strength = self.sns_strength;
const MID_UV_ALPHA: i32 = 64;
const MIN_UV_ALPHA: i32 = 30;
const MAX_UV_ALPHA: i32 = 100;
const MAX_DQ_UV: i32 = 6;
const MIN_DQ_UV: i32 = -4;
let dq_uv_ac = (analysis.uv_alpha_avg - MID_UV_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
/ (MAX_UV_ALPHA - MIN_UV_ALPHA);
let dq_uv_ac = (dq_uv_ac * i32::from(sns_strength) / 100).clamp(MIN_DQ_UV, MAX_DQ_UV);
let dq_uv_dc = (-4 * i32::from(sns_strength) / 100).clamp(-15, 15);
if dq_uv_dc != 0 {
self.quantization_indices.uvdc_delta = Some(dq_uv_dc as i8);
}
if dq_uv_ac != 0 {
self.quantization_indices.uvac_delta = Some(dq_uv_ac as i8);
}
let base_filter = super::cost::compute_filter_level(
base_quant_index,
self.filter_sharpness,
self.filter_strength,
);
for (seg_idx, ¢er) in centers.iter().enumerate() {
let center = center as i32;
let transformed_alpha = (255 * (center - mid_alpha) / effective_range).clamp(-127, 127);
let beta = (255 * (center - min_center) / effective_range).clamp(0, 255) as u8;
let seg_quant_index = compute_segment_quant(quality, transformed_alpha, sns_strength);
let seg_quant_usize = seg_quant_index as usize;
let delta = seg_quant_index as i8 - base_quant_index as i8;
let seg_filter = super::cost::compute_filter_level_with_beta(
seg_quant_index,
self.filter_sharpness,
self.filter_strength,
beta,
);
let filter_delta = (seg_filter as i8) - (base_filter as i8);
let uv_dc_idx = (seg_quant_usize as i32 + dq_uv_dc).clamp(0, 117) as usize;
let uv_ac_idx = (seg_quant_usize as i32 + dq_uv_ac).clamp(0, 127) as usize;
let mut segment = Segment {
ydc: DC_QUANT[seg_quant_usize],
yac: AC_QUANT[seg_quant_usize],
y2dc: DC_QUANT[seg_quant_usize] * 2,
y2ac: ((i32::from(AC_QUANT[seg_quant_usize]) * 155 / 100) as i16).max(8),
uvdc: DC_QUANT[uv_dc_idx],
uvac: AC_QUANT[uv_ac_idx],
quantizer_level: delta,
loopfilter_level: filter_delta,
quant_index: seg_quant_index,
..Default::default()
};
segment.init_matrices(self.sns_strength, self.method);
self.segments[seg_idx] = segment;
}
if self.num_segments > 1 {
self.simplify_segments();
}
let mut seg_counts = [0u32; 4];
for &seg_id in &self.segment_map {
seg_counts[seg_id as usize] += 1;
}
let get_proba = |a: u32, b: u32| -> u8 {
let total = a + b;
if total == 0 {
255 } else {
((255 * a + total / 2) / total) as u8
}
};
self.segment_tree_probs[0] =
get_proba(seg_counts[0] + seg_counts[1], seg_counts[2] + seg_counts[3]);
self.segment_tree_probs[1] = get_proba(seg_counts[0], seg_counts[1]);
self.segment_tree_probs[2] = get_proba(seg_counts[2], seg_counts[3]);
let should_update_map = self.segment_tree_probs[0] != 255
|| self.segment_tree_probs[1] != 255
|| self.segment_tree_probs[2] != 255;
self.segments_enabled = true;
self.segments_update_map = should_update_map;
self.left_border_y = [129u8; 16 + 1];
self.left_border_u = [129u8; 8 + 1];
self.left_border_v = [129u8; 8 + 1];
}
fn setup_encoding(
&mut self,
lossy_quality: u8,
width: u16,
height: u16,
y_buf: Vec<u8>,
u_buf: Vec<u8>,
v_buf: Vec<u8>,
) {
if lossy_quality > 100 {
panic!("lossy quality must be between 0 and 100");
}
let quant_index: u8 = quality_to_quant_index(lossy_quality);
let quant_index_usize: usize = quant_index as usize;
let mb_width = width.div_ceil(16);
let mb_height = height.div_ceil(16);
self.macroblock_width = mb_width;
self.macroblock_height = mb_height;
let filter_level = super::cost::compute_filter_level(
quant_index,
self.filter_sharpness,
self.filter_strength,
);
self.frame = Frame {
width,
height,
ybuf: y_buf,
ubuf: u_buf,
vbuf: v_buf,
version: 0,
for_display: true,
pixel_type: 0,
filter_type: false,
filter_level,
sharpness_level: self.filter_sharpness,
};
self.top_complexity = vec![Complexity::default(); usize::from(mb_width)];
self.top_b_pred = vec![IntraMode::default(); 4 * usize::from(mb_width)];
self.left_b_pred = [IntraMode::default(); 4];
self.token_probs = COEFF_PROBS;
self.macroblock_no_skip_coeff = Some(200);
let quantization_indices = QuantizationIndices {
yac_abs: quant_index,
..Default::default()
};
self.quantization_indices = quantization_indices;
for seg_idx in 0..4 {
let mut segment = Segment {
ydc: DC_QUANT[quant_index_usize],
yac: AC_QUANT[quant_index_usize],
y2dc: DC_QUANT[quant_index_usize] * 2,
y2ac: ((i32::from(AC_QUANT[quant_index_usize]) * 155 / 100) as i16).max(8),
uvdc: DC_QUANT[quant_index_usize],
uvac: AC_QUANT[quant_index_usize],
quantizer_level: 0, quant_index,
..Default::default()
};
segment.init_matrices(self.sns_strength, self.method);
self.segments[seg_idx] = segment;
}
let total_mbs = usize::from(mb_width) * usize::from(mb_height);
let use_segments = self.num_segments > 1 && total_mbs >= 256;
if use_segments {
self.analyze_and_assign_segments(quant_index, lossy_quality);
if self.preset == super::api::Preset::Auto {
let new_filter = super::cost::compute_filter_level(
quant_index,
self.filter_sharpness,
self.filter_strength,
);
self.frame.filter_level = new_filter;
self.frame.sharpness_level = self.filter_sharpness;
}
} else {
self.segments_enabled = false;
self.segments_update_map = false;
self.segment_map = Vec::new();
}
self.left_border_y = [129u8; 16 + 1];
self.left_border_u = [129u8; 8 + 1];
self.left_border_v = [129u8; 8 + 1];
self.top_border_y = vec![127u8; usize::from(self.macroblock_width) * 16 + 4];
self.top_border_u = vec![127u8; usize::from(self.macroblock_width) * 8];
self.top_border_v = vec![127u8; usize::from(self.macroblock_width) * 8];
self.top_derr = vec![[[0i8; 2]; 2]; usize::from(self.macroblock_width)];
self.left_derr = [[0; 2]; 2];
}
}
fn sse_to_psnr(sse: u64, num_pixels: u64) -> f32 {
if sse == 0 || num_pixels == 0 {
99.0
} else {
let mse = sse as f64 / num_pixels as f64;
(10.0 * libm::log10(255.0 * 255.0 / mse)) as f32
}
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn encode_frame_lossy(
writer: &mut Vec<u8>,
data: &[u8],
width: u32,
height: u32,
stride: usize,
color: PixelLayout,
params: &super::api::EncoderParams,
stop: &dyn enough::Stop,
progress: &dyn super::api::EncodeProgress,
) -> super::api::EncodeResult<super::api::EncodeStats> {
let width = width
.try_into()
.map_err(|_| at!(EncodeError::InvalidDimensions))?;
let height = height
.try_into()
.map_err(|_| at!(EncodeError::InvalidDimensions))?;
if params.target_size > 0 {
Ok(encode_with_quality_search(
writer, data, width, height, stride, color, params, stop, progress,
)?)
} else if params.target_psnr > 0.0 {
Ok(encode_with_psnr_search(
writer, data, width, height, stride, color, params, stop, progress,
)?)
} else {
let mut vp8_encoder = Vp8Encoder::new(writer);
Ok(vp8_encoder.encode_image(data, color, width, height, stride, params, stop, progress)?)
}
}
#[allow(clippy::too_many_arguments)]
fn encode_with_quality_search(
writer: &mut Vec<u8>,
data: &[u8],
width: u16,
height: u16,
stride: usize,
color: PixelLayout,
params: &super::api::EncoderParams,
stop: &dyn enough::Stop,
progress: &dyn super::api::EncodeProgress,
) -> Result<super::api::EncodeStats, EncodeError> {
let mut pass_stats = PassStats::new_for_size(params.target_size, params.lossy_quality, 1, 100);
let max_passes = (params.method + 3).max(6) as usize;
let mut best_output: Option<Vec<u8>> = None;
let mut best_enc_stats = super::api::EncodeStats::default();
let mut best_diff = f64::MAX;
for pass in 0..max_passes {
stop.check()?;
let mut trial_buffer = Vec::new();
let mut trial_encoder = Vp8Encoder::new(&mut trial_buffer);
let mut trial_params = params.clone();
trial_params.lossy_quality = libm::roundf(pass_stats.q).clamp(0.0, 100.0) as u8;
let enc_stats = trial_encoder.encode_image(
data,
color,
width,
height,
stride,
&trial_params,
stop,
progress,
)?;
let output_size = trial_buffer.len() as f64;
pass_stats.value = output_size;
let diff = (output_size - pass_stats.target).abs();
if diff < best_diff {
best_diff = diff;
best_enc_stats = enc_stats;
best_output = Some(trial_buffer);
}
let is_last = pass + 1 >= max_passes || pass_stats.is_converged();
if is_last {
break;
}
pass_stats.compute_next_q();
}
if let Some(output) = best_output {
writer.extend_from_slice(&output);
}
Ok(best_enc_stats)
}
#[allow(clippy::too_many_arguments)]
fn encode_with_psnr_search(
writer: &mut Vec<u8>,
data: &[u8],
width: u16,
height: u16,
stride: usize,
color: PixelLayout,
params: &super::api::EncoderParams,
stop: &dyn enough::Stop,
progress: &dyn super::api::EncodeProgress,
) -> Result<super::api::EncodeStats, EncodeError> {
let mut pass_stats = PassStats::new_for_psnr(params.target_psnr, params.lossy_quality, 1, 100);
let max_passes = (params.method + 3).max(6) as usize;
let mut best_output: Option<Vec<u8>> = None;
let mut best_enc_stats = super::api::EncodeStats::default();
let mut best_diff = f64::MAX;
for pass in 0..max_passes {
stop.check()?;
let mut trial_buffer = Vec::new();
let mut trial_encoder = Vp8Encoder::new(&mut trial_buffer);
let mut trial_params = params.clone();
trial_params.lossy_quality = libm::roundf(pass_stats.q).clamp(0.0, 100.0) as u8;
let enc_stats = trial_encoder.encode_image(
data,
color,
width,
height,
stride,
&trial_params,
stop,
progress,
)?;
let psnr_value = f64::from(enc_stats.psnr[3]);
pass_stats.value = psnr_value;
let diff = (psnr_value - pass_stats.target).abs();
if diff < best_diff {
best_diff = diff;
best_enc_stats = enc_stats;
best_output = Some(trial_buffer);
}
let is_last = pass + 1 >= max_passes || pass_stats.is_converged();
if is_last {
break;
}
pass_stats.compute_next_q();
}
if let Some(output) = best_output {
writer.extend_from_slice(&output);
}
Ok(best_enc_stats)
}