1#![allow(non_camel_case_types)]
12
13use std::fmt;
14use std::mem::MaybeUninit;
15
16use arrayvec::*;
17use itertools::izip;
18
19use crate::api::*;
20use crate::cdef::*;
21use crate::context::*;
22use crate::cpu_features::CpuFeatureLevel;
23use crate::deblock::*;
24use crate::dist::*;
25use crate::ec::{Writer, WriterCounter, OD_BITRES};
26use crate::encode_block_with_modes;
27use crate::encoder::{FrameInvariants, IMPORTANCE_BLOCK_SIZE};
28use crate::frame::*;
29use crate::header::ReferenceMode;
30use crate::lrf::*;
31use crate::mc::MotionVector;
32use crate::me::estimate_motion;
33use crate::me::MVSamplingMode;
34use crate::me::MotionSearchResult;
35use crate::motion_compensate;
36use crate::partition::PartitionType::*;
37use crate::partition::RefType::*;
38use crate::partition::*;
39use crate::predict::{
40 luma_ac, AngleDelta, IntraEdgeFilterParameters, IntraParam, PredictionMode,
41 RAV1E_INTER_COMPOUND_MODES, RAV1E_INTER_MODES_MINIMAL, RAV1E_INTRA_MODES,
42};
43use crate::rdo_tables::*;
44use crate::tiling::*;
45use crate::transform::{TxSet, TxSize, TxType, RAV1E_TX_TYPES};
46use crate::util::{init_slice_repeat_mut, Aligned, Pixel};
47use crate::write_tx_blocks;
48use crate::write_tx_tree;
49use crate::Tune;
50use crate::{encode_block_post_cdef, encode_block_pre_cdef};
51
52#[derive(Copy, Clone, PartialEq, Eq)]
53pub enum RDOType {
54 PixelDistRealRate,
55 TxDistRealRate,
56 TxDistEstRate,
57}
58
59impl RDOType {
60 #[inline]
61 pub const fn needs_tx_dist(self) -> bool {
62 match self {
63 RDOType::PixelDistRealRate => false,
65 RDOType::TxDistRealRate => true,
67 RDOType::TxDistEstRate => true,
69 }
70 }
71 #[inline]
72 pub const fn needs_coeff_rate(self) -> bool {
73 match self {
74 RDOType::PixelDistRealRate => true,
75 RDOType::TxDistRealRate => true,
76 RDOType::TxDistEstRate => false,
77 }
78 }
79}
80
81#[derive(Clone)]
82pub struct PartitionGroupParameters {
83 pub rd_cost: f64,
84 pub part_type: PartitionType,
85 pub part_modes: ArrayVec<PartitionParameters, 4>,
86}
87
88#[derive(Clone, Debug)]
89pub struct PartitionParameters {
90 pub rd_cost: f64,
91 pub bo: TileBlockOffset,
92 pub bsize: BlockSize,
93 pub pred_mode_luma: PredictionMode,
94 pub pred_mode_chroma: PredictionMode,
95 pub pred_cfl_params: CFLParams,
96 pub angle_delta: AngleDelta,
97 pub ref_frames: [RefType; 2],
98 pub mvs: [MotionVector; 2],
99 pub skip: bool,
100 pub has_coeff: bool,
101 pub tx_size: TxSize,
102 pub tx_type: TxType,
103 pub sidx: u8,
104}
105
106impl Default for PartitionParameters {
107 fn default() -> Self {
108 PartitionParameters {
109 rd_cost: f64::MAX,
110 bo: TileBlockOffset::default(),
111 bsize: BlockSize::BLOCK_32X32,
112 pred_mode_luma: PredictionMode::default(),
113 pred_mode_chroma: PredictionMode::default(),
114 pred_cfl_params: CFLParams::default(),
115 angle_delta: AngleDelta::default(),
116 ref_frames: [RefType::INTRA_FRAME, RefType::NONE_FRAME],
117 mvs: [MotionVector::default(); 2],
118 skip: false,
119 has_coeff: true,
120 tx_size: TxSize::TX_4X4,
121 tx_type: TxType::DCT_DCT,
122 sidx: 0,
123 }
124 }
125}
126
127pub fn estimate_rate(qindex: u8, ts: TxSize, fast_distortion: u64) -> u64 {
128 let bs_index = ts as usize;
129 let q_bin_idx = (qindex as usize) / RDO_QUANT_DIV;
130 let bin_idx_down =
131 ((fast_distortion) / RATE_EST_BIN_SIZE).min((RDO_NUM_BINS - 2) as u64);
132 let bin_idx_up = (bin_idx_down + 1).min((RDO_NUM_BINS - 1) as u64);
133 let x0 = (bin_idx_down * RATE_EST_BIN_SIZE) as i64;
134 let x1 = (bin_idx_up * RATE_EST_BIN_SIZE) as i64;
135 let y0 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_down as usize] as i64;
136 let y1 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_up as usize] as i64;
137 let slope = ((y1 - y0) << 8) / (x1 - x0);
138 (y0 + (((fast_distortion as i64 - x0) * slope) >> 8)).max(0) as u64
139}
140
141#[allow(unused)]
142pub fn cdef_dist_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>(
143 src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
144 bit_depth: usize, compute_bias: F, cpu: CpuFeatureLevel,
145) -> Distortion {
146 debug_assert!(src1.plane_cfg.xdec == 0);
147 debug_assert!(src1.plane_cfg.ydec == 0);
148 debug_assert!(src2.plane_cfg.xdec == 0);
149 debug_assert!(src2.plane_cfg.ydec == 0);
150
151 let mut sum = Distortion::zero();
152 for y in (0..h).step_by(8) {
153 for x in (0..w).step_by(8) {
154 let kernel_h = (h - y).min(8);
155 let kernel_w = (w - x).min(8);
156 let area = Area::StartingAt { x: x as isize, y: y as isize };
157
158 let value = RawDistortion(cdef_dist_kernel(
159 &src1.subregion(area),
160 &src2.subregion(area),
161 kernel_w,
162 kernel_h,
163 bit_depth,
164 cpu,
165 ) as u64);
166
167 sum += value * compute_bias(area, BlockSize::BLOCK_8X8);
170 }
171 }
172 sum
173}
174
175pub fn sse_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>(
178 src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
179 compute_bias: F, bit_depth: usize, cpu: CpuFeatureLevel,
180) -> Distortion {
181 const CHUNK_SIZE: usize = IMPORTANCE_BLOCK_SIZE >> 1;
184
185 let imp_block_w = CHUNK_SIZE << src1.plane_cfg.xdec;
188 let imp_block_h = CHUNK_SIZE << src1.plane_cfg.ydec;
189
190 let imp_bsize = BlockSize::from_width_and_height(imp_block_w, imp_block_h);
191
192 let n_imp_blocks_w = w.div_ceil(CHUNK_SIZE);
193 let n_imp_blocks_h = h.div_ceil(CHUNK_SIZE);
194
195 let mut buf_storage = Aligned::new(
203 [MaybeUninit::<u32>::uninit(); 128 / CHUNK_SIZE * 128 / CHUNK_SIZE],
204 );
205 let buf_stride = n_imp_blocks_w.next_power_of_two();
206 let buf = init_slice_repeat_mut(
207 &mut buf_storage.data[..buf_stride * n_imp_blocks_h],
208 0,
209 );
210
211 for block_y in 0..n_imp_blocks_h {
212 for block_x in 0..n_imp_blocks_w {
213 let block = Area::StartingAt {
214 x: (block_x * CHUNK_SIZE) as isize,
215 y: (block_y * CHUNK_SIZE) as isize,
216 };
217 buf[block_y * buf_stride + block_x] = compute_bias(block, imp_bsize).0;
218 }
219 }
220
221 Distortion(get_weighted_sse(
222 src1, src2, buf, buf_stride, w, h, bit_depth, cpu,
223 ))
224}
225
226#[allow(clippy::implicit_saturating_sub)]
228pub const fn clip_visible_bsize(
229 frame_w: usize, frame_h: usize, bsize: BlockSize, x: usize, y: usize,
230) -> (usize, usize) {
231 let blk_w = bsize.width();
232 let blk_h = bsize.height();
233
234 let visible_w: usize = if x + blk_w <= frame_w {
235 blk_w
236 } else if x >= frame_w {
237 0
238 } else {
239 frame_w - x
240 };
241
242 let visible_h: usize = if y + blk_h <= frame_h {
243 blk_h
244 } else if y >= frame_h {
245 0
246 } else {
247 frame_h - y
248 };
249
250 (visible_w, visible_h)
251}
252
253fn compute_distortion<T: Pixel>(
255 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
256 is_chroma_block: bool, tile_bo: TileBlockOffset, luma_only: bool,
257) -> ScaledDistortion {
258 let area = Area::BlockStartingAt { bo: tile_bo.0 };
259 let input_region = ts.input_tile.planes[0].subregion(area);
260 let rec_region = ts.rec.planes[0].subregion(area);
261
262 let frame_bo = ts.to_frame_block_offset(tile_bo);
264 let (visible_w, visible_h) = clip_visible_bsize(
265 fi.width,
266 fi.height,
267 bsize,
268 frame_bo.0.x << MI_SIZE_LOG2,
269 frame_bo.0.y << MI_SIZE_LOG2,
270 );
271
272 if visible_w == 0 || visible_h == 0 {
273 return ScaledDistortion::zero();
274 }
275
276 let mut distortion = match fi.config.tune {
277 Tune::Psychovisual => cdef_dist_wxh(
278 &input_region,
279 &rec_region,
280 visible_w,
281 visible_h,
282 fi.sequence.bit_depth,
283 |bias_area, bsize| {
284 distortion_scale(
285 fi,
286 input_region.subregion(bias_area).frame_block_offset(),
287 bsize,
288 )
289 },
290 fi.cpu_feature_level,
291 ),
292 Tune::Psnr => sse_wxh(
293 &input_region,
294 &rec_region,
295 visible_w,
296 visible_h,
297 |bias_area, bsize| {
298 distortion_scale(
299 fi,
300 input_region.subregion(bias_area).frame_block_offset(),
301 bsize,
302 )
303 },
304 fi.sequence.bit_depth,
305 fi.cpu_feature_level,
306 ),
307 } * fi.dist_scale[0];
308
309 if is_chroma_block
310 && !luma_only
311 && fi.sequence.chroma_sampling != ChromaSampling::Cs400
312 {
313 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
314 let chroma_w = if bsize.width() >= 8 || xdec == 0 {
315 (visible_w + xdec) >> xdec
316 } else {
317 (4 + visible_w + xdec) >> xdec
318 };
319 let chroma_h = if bsize.height() >= 8 || ydec == 0 {
320 (visible_h + ydec) >> ydec
321 } else {
322 (4 + visible_h + ydec) >> ydec
323 };
324
325 for p in 1..3 {
326 let input_region = ts.input_tile.planes[p].subregion(area);
327 let rec_region = ts.rec.planes[p].subregion(area);
328 distortion += sse_wxh(
329 &input_region,
330 &rec_region,
331 chroma_w,
332 chroma_h,
333 |bias_area, bsize| {
334 distortion_scale(
335 fi,
336 input_region.subregion(bias_area).frame_block_offset(),
337 bsize,
338 )
339 },
340 fi.sequence.bit_depth,
341 fi.cpu_feature_level,
342 ) * fi.dist_scale[p];
343 }
344 }
345 distortion
346}
347
348fn compute_tx_distortion<T: Pixel>(
350 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
351 is_chroma_block: bool, tile_bo: TileBlockOffset, tx_dist: ScaledDistortion,
352 skip: bool, luma_only: bool,
353) -> ScaledDistortion {
354 assert!(fi.config.tune == Tune::Psnr);
355 let area = Area::BlockStartingAt { bo: tile_bo.0 };
356 let input_region = ts.input_tile.planes[0].subregion(area);
357 let rec_region = ts.rec.planes[0].subregion(area);
358
359 let (visible_w, visible_h) = if !skip {
360 (bsize.width(), bsize.height())
361 } else {
362 let frame_bo = ts.to_frame_block_offset(tile_bo);
363 clip_visible_bsize(
364 fi.width,
365 fi.height,
366 bsize,
367 frame_bo.0.x << MI_SIZE_LOG2,
368 frame_bo.0.y << MI_SIZE_LOG2,
369 )
370 };
371
372 if visible_w == 0 || visible_h == 0 {
373 return ScaledDistortion::zero();
374 }
375
376 let mut distortion = if skip {
377 sse_wxh(
378 &input_region,
379 &rec_region,
380 visible_w,
381 visible_h,
382 |bias_area, bsize| {
383 distortion_scale(
384 fi,
385 input_region.subregion(bias_area).frame_block_offset(),
386 bsize,
387 )
388 },
389 fi.sequence.bit_depth,
390 fi.cpu_feature_level,
391 ) * fi.dist_scale[0]
392 } else {
393 tx_dist
394 };
395
396 if is_chroma_block
397 && !luma_only
398 && skip
399 && fi.sequence.chroma_sampling != ChromaSampling::Cs400
400 {
401 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
402 let chroma_w = if bsize.width() >= 8 || xdec == 0 {
403 (visible_w + xdec) >> xdec
404 } else {
405 (4 + visible_w + xdec) >> xdec
406 };
407 let chroma_h = if bsize.height() >= 8 || ydec == 0 {
408 (visible_h + ydec) >> ydec
409 } else {
410 (4 + visible_h + ydec) >> ydec
411 };
412
413 for p in 1..3 {
414 let input_region = ts.input_tile.planes[p].subregion(area);
415 let rec_region = ts.rec.planes[p].subregion(area);
416 distortion += sse_wxh(
417 &input_region,
418 &rec_region,
419 chroma_w,
420 chroma_h,
421 |bias_area, bsize| {
422 distortion_scale(
423 fi,
424 input_region.subregion(bias_area).frame_block_offset(),
425 bsize,
426 )
427 },
428 fi.sequence.bit_depth,
429 fi.cpu_feature_level,
430 ) * fi.dist_scale[p];
431 }
432 }
433 distortion
434}
435
436pub fn distortion_scale<T: Pixel>(
444 fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize,
445) -> DistortionScale {
446 if !fi.config.temporal_rdo() {
447 return DistortionScale::default();
448 }
449 assert!(bsize <= BlockSize::BLOCK_8X8);
453
454 let x = frame_bo.0.x >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
455 let y = frame_bo.0.y >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
456
457 let coded_data = fi.coded_frame_data.as_ref().unwrap();
458 coded_data.distortion_scales[y * coded_data.w_in_imp_b + x]
459}
460
461pub fn spatiotemporal_scale<T: Pixel>(
465 fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize,
466) -> DistortionScale {
467 if !fi.config.temporal_rdo() && fi.config.tune != Tune::Psychovisual {
468 return DistortionScale::default();
469 }
470
471 let coded_data = fi.coded_frame_data.as_ref().unwrap();
472
473 let x0 = frame_bo.0.x >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
474 let y0 = frame_bo.0.y >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
475 let x1 = (x0 + bsize.width_imp_b()).min(coded_data.w_in_imp_b);
476 let y1 = (y0 + bsize.height_imp_b()).min(coded_data.h_in_imp_b);
477 let den = (((x1 - x0) * (y1 - y0)) as u64) << DistortionScale::SHIFT;
478
479 #[inline(always)]
482 fn take_slice<T>(slice: &[T], n: usize) -> &[T] {
483 slice.get(..n).unwrap_or(slice)
484 }
485
486 let mut sum = 0;
487 for y in y0..y1 {
488 sum += take_slice(
489 &coded_data.distortion_scales[y * coded_data.w_in_imp_b..][x0..x1],
490 MAX_SB_IN_IMP_B,
491 )
492 .iter()
493 .zip(
494 take_slice(
495 &coded_data.activity_scales[y * coded_data.w_in_imp_b..][x0..x1],
496 MAX_SB_IN_IMP_B,
497 )
498 .iter(),
499 )
500 .map(|(d, a)| d.0 as u64 * a.0 as u64)
501 .sum::<u64>();
502 }
503 DistortionScale(((sum + (den >> 1)) / den) as u32)
504}
505
506pub fn distortion_scale_for(
507 propagate_cost: f64, intra_cost: f64,
508) -> DistortionScale {
509 if intra_cost == 0. {
547 return DistortionScale::default(); }
549
550 let strength = 1.0; let frac = (intra_cost + propagate_cost) / intra_cost;
552 frac.powf(strength / 3.0).into()
553}
554
555#[repr(transparent)]
557#[derive(Copy, Clone)]
558pub struct DistortionScale(pub u32);
559
560#[repr(transparent)]
561pub struct RawDistortion(u64);
562
563#[repr(transparent)]
564pub struct Distortion(pub u64);
565
566#[repr(transparent)]
567pub struct ScaledDistortion(u64);
568
569impl DistortionScale {
570 const SHIFT: u32 = 14;
572 const BITS: u32 = 28;
575 const MAX: u64 = (1 << Self::BITS) - 1;
577
578 #[inline]
579 pub const fn new(num: u64, den: u64) -> Self {
580 let raw = (num << Self::SHIFT).saturating_add(den / 2) / den;
581 let mask = (raw <= Self::MAX) as u64;
582 Self((mask * raw + (1 - mask) * Self::MAX) as u32)
583 }
584
585 pub fn inv_mean(slice: &[Self]) -> Self {
586 use crate::util::{bexp64, blog32_q11};
587 let sum = slice.iter().map(|&s| blog32_q11(s.0) as i64).sum::<i64>();
588 let log_inv_mean_q11 =
589 (Self::SHIFT << 11) as i64 - sum / slice.len() as i64;
590 Self(
591 bexp64((log_inv_mean_q11 + (Self::SHIFT << 11) as i64) << (57 - 11))
592 .clamp(1, (1 << Self::BITS) - 1) as u32,
593 )
594 }
595
596 #[inline]
598 pub const fn blog16(self) -> i16 {
599 use crate::util::blog32_q11;
600 (blog32_q11(self.0) - ((Self::SHIFT as i32) << 11)) as i16
601 }
602
603 #[inline]
605 pub const fn blog64(self) -> i64 {
606 use crate::util::{blog64, q57};
607 blog64(self.0 as i64) - q57(Self::SHIFT as i32)
608 }
609
610 #[inline]
613 pub const fn mul_u64(self, dist: u64) -> u64 {
614 (self.0 as u64 * dist + (1 << Self::SHIFT >> 1)) >> Self::SHIFT
615 }
616}
617
618impl std::ops::Mul for DistortionScale {
619 type Output = Self;
620
621 #[inline]
623 fn mul(self, rhs: Self) -> Self {
624 Self(
625 (((self.0 as u64 * rhs.0 as u64) + (1 << (Self::SHIFT - 1)))
626 >> Self::SHIFT)
627 .clamp(1, (1 << Self::BITS) - 1) as u32,
628 )
629 }
630}
631
632impl std::ops::MulAssign for DistortionScale {
633 fn mul_assign(&mut self, rhs: Self) {
634 *self = *self * rhs;
635 }
636}
637
638impl Default for DistortionScale {
640 #[inline]
641 fn default() -> Self {
642 Self(1 << Self::SHIFT)
643 }
644}
645
646impl fmt::Debug for DistortionScale {
647 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
648 write!(f, "{}", f64::from(*self))
649 }
650}
651
652impl From<f64> for DistortionScale {
653 #[inline]
654 fn from(scale: f64) -> Self {
655 let den = 1 << (Self::SHIFT + 1);
656 Self::new((scale * den as f64) as u64, den)
657 }
658}
659
660impl From<DistortionScale> for f64 {
661 #[inline]
662 fn from(scale: DistortionScale) -> Self {
663 scale.0 as f64 / (1 << DistortionScale::SHIFT) as f64
664 }
665}
666
667impl RawDistortion {
668 #[inline]
669 pub const fn new(dist: u64) -> Self {
670 Self(dist)
671 }
672}
673
674impl std::ops::Mul<DistortionScale> for RawDistortion {
675 type Output = Distortion;
676 #[inline]
677 fn mul(self, rhs: DistortionScale) -> Distortion {
678 Distortion(rhs.mul_u64(self.0))
679 }
680}
681
682impl Distortion {
683 #[inline]
684 pub const fn zero() -> Self {
685 Self(0)
686 }
687}
688
689impl std::ops::Mul<DistortionScale> for Distortion {
690 type Output = ScaledDistortion;
691 #[inline]
692 fn mul(self, rhs: DistortionScale) -> ScaledDistortion {
693 ScaledDistortion(rhs.mul_u64(self.0))
694 }
695}
696
697impl std::ops::AddAssign for Distortion {
698 #[inline]
699 fn add_assign(&mut self, other: Self) {
700 self.0 += other.0;
701 }
702}
703
704impl ScaledDistortion {
705 #[inline]
706 pub const fn zero() -> Self {
707 Self(0)
708 }
709}
710
711impl std::ops::AddAssign for ScaledDistortion {
712 #[inline]
713 fn add_assign(&mut self, other: Self) {
714 self.0 += other.0;
715 }
716}
717
718pub fn compute_rd_cost<T: Pixel>(
719 fi: &FrameInvariants<T>, rate: u32, distortion: ScaledDistortion,
720) -> f64 {
721 let rate_in_bits = (rate as f64) / ((1 << OD_BITRES) as f64);
722 fi.lambda.mul_add(rate_in_bits, distortion.0 as f64)
723}
724
725pub fn rdo_tx_size_type<T: Pixel>(
726 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
727 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
728 luma_mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
729 skip: bool,
730) -> (TxSize, TxType) {
731 let is_inter = !luma_mode.is_intra();
732 let mut tx_size = max_txsize_rect_lookup[bsize as usize];
733
734 if fi.enable_inter_txfm_split && is_inter && !skip {
735 tx_size = sub_tx_size_map[tx_size as usize]; }
737
738 let mut best_tx_type = TxType::DCT_DCT;
739 let mut best_tx_size = tx_size;
740 let mut best_rd = f64::MAX;
741
742 let do_rdo_tx_size = fi.tx_mode_select
743 && fi.config.speed_settings.transform.rdo_tx_decision
744 && !is_inter;
745 let rdo_tx_depth = if do_rdo_tx_size { 2 } else { 0 };
746 let mut cw_checkpoint: Option<ContextWriterCheckpoint> = None;
747
748 for _ in 0..=rdo_tx_depth {
749 let tx_set = get_tx_set(tx_size, is_inter, fi.use_reduced_tx_set);
750
751 let do_rdo_tx_type = tx_set > TxSet::TX_SET_DCTONLY
752 && fi.config.speed_settings.transform.rdo_tx_decision
753 && !is_inter
754 && !skip;
755
756 if !do_rdo_tx_size && !do_rdo_tx_type {
757 return (best_tx_size, best_tx_type);
758 };
759
760 let tx_types =
761 if do_rdo_tx_type { RAV1E_TX_TYPES } else { &[TxType::DCT_DCT] };
762
763 let (tx_type, rd_cost) = rdo_tx_type_decision(
765 fi,
766 ts,
767 cw,
768 &mut cw_checkpoint,
769 luma_mode,
770 ref_frames,
771 mvs,
772 bsize,
773 tile_bo,
774 tx_size,
775 tx_set,
776 tx_types,
777 best_rd,
778 );
779
780 if rd_cost < best_rd {
781 best_tx_size = tx_size;
782 best_tx_type = tx_type;
783 best_rd = rd_cost;
784 }
785
786 debug_assert!(tx_size.width_log2() <= bsize.width_log2());
787 debug_assert!(tx_size.height_log2() <= bsize.height_log2());
788 debug_assert!(
789 tx_size.sqr() <= TxSize::TX_32X32 || tx_type == TxType::DCT_DCT
790 );
791
792 let next_tx_size = sub_tx_size_map[tx_size as usize];
793
794 if next_tx_size == tx_size {
795 break;
796 } else {
797 tx_size = next_tx_size;
798 };
799 }
800
801 (best_tx_size, best_tx_type)
802}
803
804#[inline]
805const fn dmv_in_range(mv: MotionVector, ref_mv: MotionVector) -> bool {
806 let diff_row = mv.row as i32 - ref_mv.row as i32;
807 let diff_col = mv.col as i32 - ref_mv.col as i32;
808 diff_row >= MV_LOW
809 && diff_row <= MV_UPP
810 && diff_col >= MV_LOW
811 && diff_col <= MV_UPP
812}
813
814#[inline]
815#[profiling::function]
816fn luma_chroma_mode_rdo<T: Pixel>(
817 luma_mode: PredictionMode, fi: &FrameInvariants<T>, bsize: BlockSize,
818 tile_bo: TileBlockOffset, ts: &mut TileStateMut<'_, T>,
819 cw: &mut ContextWriter, rdo_type: RDOType,
820 cw_checkpoint: &ContextWriterCheckpoint, best: &mut PartitionParameters,
821 mvs: [MotionVector; 2], ref_frames: [RefType; 2],
822 mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool,
823 mode_context: usize, mv_stack: &ArrayVec<CandidateMV, 9>,
824 angle_delta: AngleDelta,
825) {
826 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
827
828 let is_chroma_block =
829 has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
830
831 if !luma_mode_is_intra {
832 let ref_mvs = if mv_stack.is_empty() {
833 [MotionVector::default(); 2]
834 } else {
835 [mv_stack[0].this_mv, mv_stack[0].comp_mv]
836 };
837
838 if (luma_mode == PredictionMode::NEWMV
839 || luma_mode == PredictionMode::NEW_NEWMV
840 || luma_mode == PredictionMode::NEW_NEARESTMV)
841 && !dmv_in_range(mvs[0], ref_mvs[0])
842 {
843 return;
844 }
845
846 if (luma_mode == PredictionMode::NEW_NEWMV
847 || luma_mode == PredictionMode::NEAREST_NEWMV)
848 && !dmv_in_range(mvs[1], ref_mvs[1])
849 {
850 return;
851 }
852 }
853
854 let mut chroma_rdo = |skip: bool| -> bool {
856 use crate::segmentation::select_segment;
857
858 let mut zero_distortion = false;
859
860 for sidx in select_segment(fi, ts, tile_bo, bsize, skip) {
861 cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, sidx);
862
863 let (tx_size, tx_type) = rdo_tx_size_type(
864 fi, ts, cw, bsize, tile_bo, luma_mode, ref_frames, mvs, skip,
865 );
866 for &chroma_mode in mode_set_chroma.iter() {
867 let wr = &mut WriterCounter::new();
868 let tell = wr.tell_frac();
869
870 if bsize >= BlockSize::BLOCK_8X8 && bsize.is_sqr() {
871 cw.write_partition(
872 wr,
873 tile_bo,
874 PartitionType::PARTITION_NONE,
875 bsize,
876 );
877 }
878
879 let need_recon_pixel =
881 luma_mode_is_intra && tx_size.block_size() != bsize;
882
883 encode_block_pre_cdef(&fi.sequence, ts, cw, wr, bsize, tile_bo, skip);
884 let (has_coeff, tx_dist) = encode_block_post_cdef(
885 fi,
886 ts,
887 cw,
888 wr,
889 luma_mode,
890 chroma_mode,
891 angle_delta,
892 ref_frames,
893 mvs,
894 bsize,
895 tile_bo,
896 skip,
897 CFLParams::default(),
898 tx_size,
899 tx_type,
900 mode_context,
901 mv_stack,
902 rdo_type,
903 need_recon_pixel,
904 None,
905 );
906
907 let rate = wr.tell_frac() - tell;
908 let distortion = if fi.use_tx_domain_distortion && !need_recon_pixel {
909 compute_tx_distortion(
910 fi,
911 ts,
912 bsize,
913 is_chroma_block,
914 tile_bo,
915 tx_dist,
916 skip,
917 false,
918 )
919 } else {
920 compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false)
921 };
922 let is_zero_dist = distortion.0 == 0;
923 let rd = compute_rd_cost(fi, rate, distortion);
924 if rd < best.rd_cost {
925 best.rd_cost = rd;
927 best.pred_mode_luma = luma_mode;
928 best.pred_mode_chroma = chroma_mode;
929 best.angle_delta = angle_delta;
930 best.ref_frames = ref_frames;
931 best.mvs = mvs;
932 best.skip = skip;
933 best.has_coeff = has_coeff;
934 best.tx_size = tx_size;
935 best.tx_type = tx_type;
936 best.sidx = sidx;
937 zero_distortion = is_zero_dist;
938 }
939
940 cw.rollback(cw_checkpoint);
941 }
942 }
943
944 zero_distortion
945 };
946
947 let zero_distortion =
949 if !luma_mode_is_intra { chroma_rdo(true) } else { false };
950 if !zero_distortion {
952 chroma_rdo(false);
953 }
954}
955
956#[profiling::function]
963pub fn rdo_mode_decision<T: Pixel>(
964 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
965 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
966 inter_cfg: &InterConfig,
967) -> PartitionParameters {
968 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
969 let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
970
971 let rdo_type = if fi.use_tx_domain_rate {
972 RDOType::TxDistEstRate
973 } else if fi.use_tx_domain_distortion {
974 RDOType::TxDistRealRate
975 } else {
976 RDOType::PixelDistRealRate
977 };
978
979 let mut best = if fi.frame_type.has_inter() {
980 assert!(fi.frame_type != FrameType::KEY);
981
982 inter_frame_rdo_mode_decision(
983 fi,
984 ts,
985 cw,
986 bsize,
987 tile_bo,
988 inter_cfg,
989 &cw_checkpoint,
990 rdo_type,
991 )
992 } else {
993 PartitionParameters::default()
994 };
995
996 let is_chroma_block =
997 has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
998
999 if !best.skip {
1000 best = intra_frame_rdo_mode_decision(
1001 fi,
1002 ts,
1003 cw,
1004 bsize,
1005 tile_bo,
1006 &cw_checkpoint,
1007 rdo_type,
1008 best,
1009 is_chroma_block,
1010 );
1011 }
1012
1013 if best.pred_mode_luma.is_intra() && is_chroma_block && bsize.cfl_allowed() {
1014 cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, best.sidx);
1015
1016 let chroma_mode = PredictionMode::UV_CFL_PRED;
1017 let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
1018 let mut wr = WriterCounter::new();
1019 let angle_delta = AngleDelta { y: best.angle_delta.y, uv: 0 };
1020
1021 write_tx_blocks(
1022 fi,
1023 ts,
1024 cw,
1025 &mut wr,
1026 best.pred_mode_luma,
1027 best.pred_mode_luma,
1028 angle_delta,
1029 tile_bo,
1030 bsize,
1031 best.tx_size,
1032 best.tx_type,
1033 false,
1034 CFLParams::default(),
1035 true,
1036 rdo_type,
1037 true,
1038 );
1039 cw.rollback(&cw_checkpoint);
1040 if fi.sequence.chroma_sampling != ChromaSampling::Cs400 {
1041 if let Some(cfl) = rdo_cfl_alpha(ts, tile_bo, bsize, best.tx_size, fi) {
1042 let mut wr = WriterCounter::new();
1043 let tell = wr.tell_frac();
1044
1045 encode_block_pre_cdef(
1046 &fi.sequence,
1047 ts,
1048 cw,
1049 &mut wr,
1050 bsize,
1051 tile_bo,
1052 best.skip,
1053 );
1054 let (has_coeff, _) = encode_block_post_cdef(
1055 fi,
1056 ts,
1057 cw,
1058 &mut wr,
1059 best.pred_mode_luma,
1060 chroma_mode,
1061 angle_delta,
1062 best.ref_frames,
1063 best.mvs,
1064 bsize,
1065 tile_bo,
1066 best.skip,
1067 cfl,
1068 best.tx_size,
1069 best.tx_type,
1070 0,
1071 &[],
1072 rdo_type,
1073 true, None,
1075 );
1076
1077 let rate = wr.tell_frac() - tell;
1078
1079 let distortion =
1081 compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false);
1082 let rd = compute_rd_cost(fi, rate, distortion);
1083 if rd < best.rd_cost {
1084 best.rd_cost = rd;
1085 best.pred_mode_chroma = chroma_mode;
1086 best.angle_delta = angle_delta;
1087 best.has_coeff = has_coeff;
1088 best.pred_cfl_params = cfl;
1089 }
1090
1091 cw.rollback(&cw_checkpoint);
1092 }
1093 }
1094 }
1095
1096 cw.bc.blocks.set_mode(tile_bo, bsize, best.pred_mode_luma);
1097 cw.bc.blocks.set_ref_frames(tile_bo, bsize, best.ref_frames);
1098 cw.bc.blocks.set_motion_vectors(tile_bo, bsize, best.mvs);
1099
1100 assert!(best.rd_cost >= 0_f64);
1101
1102 PartitionParameters {
1103 bo: tile_bo,
1104 bsize,
1105 pred_mode_luma: best.pred_mode_luma,
1106 pred_mode_chroma: best.pred_mode_chroma,
1107 pred_cfl_params: best.pred_cfl_params,
1108 angle_delta: best.angle_delta,
1109 ref_frames: best.ref_frames,
1110 mvs: best.mvs,
1111 rd_cost: best.rd_cost,
1112 skip: best.skip,
1113 has_coeff: best.has_coeff,
1114 tx_size: best.tx_size,
1115 tx_type: best.tx_type,
1116 sidx: best.sidx,
1117 }
1118}
1119
1120#[profiling::function]
1121fn inter_frame_rdo_mode_decision<T: Pixel>(
1122 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1123 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1124 inter_cfg: &InterConfig, cw_checkpoint: &ContextWriterCheckpoint,
1125 rdo_type: RDOType,
1126) -> PartitionParameters {
1127 let mut best = PartitionParameters::default();
1128
1129 let mut ref_frames_set = ArrayVec::<_, 7>::new();
1131 let mut ref_slot_set = ArrayVec::<_, 7>::new();
1133 let mut mvs_from_me = ArrayVec::<_, 3>::new();
1135 let mut fwdref = None;
1136 let mut bwdref = None;
1137
1138 for i in inter_cfg.allowed_ref_frames().iter().copied() {
1139 if i == LAST3_FRAME {
1141 continue;
1142 }
1143
1144 if !ref_slot_set.contains(&fi.ref_frames[i.to_index()]) {
1145 if fwdref.is_none() && i.is_fwd_ref() {
1146 fwdref = Some(ref_frames_set.len());
1147 }
1148 if bwdref.is_none() && i.is_bwd_ref() {
1149 bwdref = Some(ref_frames_set.len());
1150 }
1151 ref_frames_set.push([i, NONE_FRAME]);
1152 let slot_idx = fi.ref_frames[i.to_index()];
1153 ref_slot_set.push(slot_idx);
1154 }
1155 }
1156 assert!(!ref_frames_set.is_empty());
1157
1158 let mut inter_mode_set = ArrayVec::<(PredictionMode, usize), 20>::new();
1159 let mut mvs_set = ArrayVec::<[MotionVector; 2], 20>::new();
1160 let mut satds = ArrayVec::<u32, 20>::new();
1161 let mut mv_stacks = ArrayVec::<_, 20>::new();
1162 let mut mode_contexts = ArrayVec::<_, 7>::new();
1163
1164 for (i, &ref_frames) in ref_frames_set.iter().enumerate() {
1165 let mut mv_stack = ArrayVec::<CandidateMV, 9>::new();
1166 mode_contexts.push(cw.find_mvrefs(
1167 tile_bo,
1168 ref_frames,
1169 &mut mv_stack,
1170 bsize,
1171 fi,
1172 false,
1173 ));
1174
1175 let mut pmv = [MotionVector::default(); 2];
1176 if !mv_stack.is_empty() {
1177 pmv[0] = mv_stack[0].this_mv;
1178 }
1179 if mv_stack.len() > 1 {
1180 pmv[1] = mv_stack[1].this_mv;
1181 }
1182
1183 let res = estimate_motion(
1184 fi,
1185 ts,
1186 bsize.width(),
1187 bsize.height(),
1188 tile_bo,
1189 ref_frames[0],
1190 Some(pmv),
1191 MVSamplingMode::CORNER { right: true, bottom: true },
1192 false,
1193 0,
1194 None,
1195 )
1196 .unwrap_or_else(MotionSearchResult::empty);
1197 let b_me = res.mv;
1198
1199 mvs_from_me.push([b_me, MotionVector::default()]);
1200
1201 for &x in RAV1E_INTER_MODES_MINIMAL {
1202 inter_mode_set.push((x, i));
1203 }
1204 if !mv_stack.is_empty() {
1205 inter_mode_set.push((PredictionMode::NEAR0MV, i));
1206 }
1207 if mv_stack.len() >= 2 {
1208 inter_mode_set.push((PredictionMode::GLOBALMV, i));
1209 }
1210 let include_near_mvs = fi.config.speed_settings.motion.include_near_mvs;
1211 if include_near_mvs {
1212 if mv_stack.len() >= 3 {
1213 inter_mode_set.push((PredictionMode::NEAR1MV, i));
1214 }
1215 if mv_stack.len() >= 4 {
1216 inter_mode_set.push((PredictionMode::NEAR2MV, i));
1217 }
1218 }
1219 let same_row_col = |x: &CandidateMV| {
1220 x.this_mv.row == mvs_from_me[i][0].row
1221 && x.this_mv.col == mvs_from_me[i][0].col
1222 };
1223 if !mv_stack
1224 .iter()
1225 .take(if include_near_mvs { 4 } else { 2 })
1226 .any(same_row_col)
1227 && (mvs_from_me[i][0].row != 0 || mvs_from_me[i][0].col != 0)
1228 {
1229 inter_mode_set.push((PredictionMode::NEWMV, i));
1230 }
1231
1232 mv_stacks.push(mv_stack);
1233 }
1234
1235 let sz = bsize.width_mi().min(bsize.height_mi());
1236
1237 if fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 {
1239 if let Some(r0) = fwdref {
1241 if let Some(r1) = bwdref {
1242 let ref_frames = [ref_frames_set[r0][0], ref_frames_set[r1][0]];
1243 ref_frames_set.push(ref_frames);
1244 let mv0 = mvs_from_me[r0][0];
1245 let mv1 = mvs_from_me[r1][0];
1246 mvs_from_me.push([mv0, mv1]);
1247 let mut mv_stack = ArrayVec::<CandidateMV, 9>::new();
1248 mode_contexts.push(cw.find_mvrefs(
1249 tile_bo,
1250 ref_frames,
1251 &mut mv_stack,
1252 bsize,
1253 fi,
1254 true,
1255 ));
1256 for &x in RAV1E_INTER_COMPOUND_MODES {
1257 if fi.config.speed_settings.motion.include_near_mvs
1259 || !x.has_nearmv()
1260 {
1261 let mv_stack_idx = ref_frames_set.len() - 1;
1262 if !(x.has_nearmv() && x.ref_mv_idx() >= mv_stack.len()) {
1264 inter_mode_set.push((x, mv_stack_idx));
1265 }
1266 }
1267 }
1268 mv_stacks.push(mv_stack);
1269 }
1270 }
1271 }
1272
1273 let num_modes_rdo = if fi.config.speed_settings.prediction.prediction_modes
1274 >= PredictionModesSetting::ComplexAll
1275 {
1276 inter_mode_set.len()
1277 } else {
1278 9 };
1280
1281 inter_mode_set.iter().for_each(|&(luma_mode, i)| {
1282 let mvs = match luma_mode {
1283 PredictionMode::NEWMV | PredictionMode::NEW_NEWMV => mvs_from_me[i],
1284 PredictionMode::NEARESTMV | PredictionMode::NEAREST_NEARESTMV => {
1285 if !mv_stacks[i].is_empty() {
1286 [mv_stacks[i][0].this_mv, mv_stacks[i][0].comp_mv]
1287 } else {
1288 [MotionVector::default(); 2]
1289 }
1290 }
1291 PredictionMode::NEAR0MV | PredictionMode::NEAR_NEAR0MV => {
1292 if mv_stacks[i].len() > 1 {
1293 [mv_stacks[i][1].this_mv, mv_stacks[i][1].comp_mv]
1294 } else {
1295 [MotionVector::default(); 2]
1296 }
1297 }
1298 PredictionMode::NEAR1MV
1299 | PredictionMode::NEAR2MV
1300 | PredictionMode::NEAR_NEAR1MV
1301 | PredictionMode::NEAR_NEAR2MV => [
1302 mv_stacks[i][luma_mode.ref_mv_idx()].this_mv,
1303 mv_stacks[i][luma_mode.ref_mv_idx()].comp_mv,
1304 ],
1305 PredictionMode::NEAREST_NEWMV => {
1306 [mv_stacks[i][0].this_mv, mvs_from_me[i][1]]
1307 }
1308 PredictionMode::NEW_NEARESTMV => {
1309 [mvs_from_me[i][0], mv_stacks[i][0].comp_mv]
1310 }
1311 PredictionMode::GLOBALMV | PredictionMode::GLOBAL_GLOBALMV => {
1312 [MotionVector::default(); 2]
1313 }
1314 _ => {
1315 unimplemented!();
1316 }
1317 };
1318 mvs_set.push(mvs);
1319
1320 if num_modes_rdo != inter_mode_set.len() {
1322 let tile_rect = ts.tile_rect();
1323 let rec = &mut ts.rec.planes[0];
1324 let po = tile_bo.plane_offset(rec.plane_cfg);
1325 let mut rec_region =
1326 rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1327
1328 luma_mode.predict_inter(
1329 fi,
1330 tile_rect,
1331 0,
1332 po,
1333 &mut rec_region,
1334 bsize.width(),
1335 bsize.height(),
1336 ref_frames_set[i],
1337 mvs,
1338 &mut ts.inter_compound_buffers,
1339 );
1340
1341 let plane_org = ts.input_tile.planes[0]
1342 .subregion(Area::BlockStartingAt { bo: tile_bo.0 });
1343 let plane_ref = rec_region.as_const();
1344
1345 let satd = get_satd(
1346 &plane_org,
1347 &plane_ref,
1348 bsize.width(),
1349 bsize.height(),
1350 fi.sequence.bit_depth,
1351 fi.cpu_feature_level,
1352 );
1353 satds.push(satd);
1354 } else {
1355 satds.push(0);
1356 }
1357 });
1358
1359 let mut sorted =
1360 izip!(inter_mode_set, mvs_set, satds).collect::<ArrayVec<_, 20>>();
1361 if num_modes_rdo != sorted.len() {
1362 sorted.sort_by_key(|((_mode, _i), _mvs, satd)| *satd);
1363 }
1364
1365 sorted.iter().take(num_modes_rdo).for_each(
1366 |&((luma_mode, i), mvs, _satd)| {
1367 let mode_set_chroma = ArrayVec::from([luma_mode]);
1368
1369 luma_chroma_mode_rdo(
1370 luma_mode,
1371 fi,
1372 bsize,
1373 tile_bo,
1374 ts,
1375 cw,
1376 rdo_type,
1377 cw_checkpoint,
1378 &mut best,
1379 mvs,
1380 ref_frames_set[i],
1381 &mode_set_chroma,
1382 false,
1383 mode_contexts[i],
1384 &mv_stacks[i],
1385 AngleDelta::default(),
1386 );
1387 },
1388 );
1389
1390 best
1391}
1392
1393#[profiling::function]
1394fn intra_frame_rdo_mode_decision<T: Pixel>(
1395 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1396 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1397 cw_checkpoint: &ContextWriterCheckpoint, rdo_type: RDOType,
1398 mut best: PartitionParameters, is_chroma_block: bool,
1399) -> PartitionParameters {
1400 let mut modes = ArrayVec::<_, INTRA_MODES>::new();
1401
1402 let num_modes_rdo = if (fi.frame_type == FrameType::KEY
1404 && fi.config.speed_settings.prediction.prediction_modes
1405 >= PredictionModesSetting::ComplexKeyframes)
1406 || (fi.frame_type.has_inter()
1407 && fi.config.speed_settings.prediction.prediction_modes
1408 >= PredictionModesSetting::ComplexAll)
1409 {
1410 7
1411 } else {
1412 3
1413 };
1414
1415 let intra_mode_set = RAV1E_INTRA_MODES;
1416
1417 {
1419 use crate::ec::cdf_to_pdf;
1420
1421 let probs_all = cdf_to_pdf(if fi.frame_type.has_inter() {
1422 cw.get_cdf_intra_mode(bsize)
1423 } else {
1424 cw.get_cdf_intra_mode_kf(tile_bo)
1425 });
1426
1427 modes.try_extend_from_slice(intra_mode_set).unwrap();
1428 modes.sort_by_key(|&a| !probs_all[a as usize]);
1429 }
1430
1431 {
1434 let satds = {
1435 let tx_size = bsize.tx_size();
1437 let mut edge_buf = Aligned::uninit_array();
1438 let edge_buf = {
1439 let rec = &ts.rec.planes[0].as_const();
1440 let po = tile_bo.plane_offset(rec.plane_cfg);
1441 get_intra_edges(
1443 &mut edge_buf,
1444 rec,
1445 tile_bo,
1446 0,
1447 0,
1448 bsize,
1449 po,
1450 tx_size,
1451 fi.sequence.bit_depth,
1452 None,
1453 fi.sequence.enable_intra_edge_filter,
1454 IntraParam::None,
1455 )
1456 };
1457
1458 let ief_params = if fi.sequence.enable_intra_edge_filter {
1459 let above_block_info = ts.above_block_info(tile_bo, 0, 0);
1460 let left_block_info = ts.left_block_info(tile_bo, 0, 0);
1461 Some(IntraEdgeFilterParameters::new(
1462 0,
1463 above_block_info,
1464 left_block_info,
1465 ))
1466 } else {
1467 None
1468 };
1469
1470 let mut satds_all = [0; INTRA_MODES];
1471 for &luma_mode in modes.iter().skip(num_modes_rdo / 2) {
1472 let tile_rect = ts.tile_rect();
1473 let rec = &mut ts.rec.planes[0];
1474 let mut rec_region =
1475 rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1476 luma_mode.predict_intra(
1478 tile_rect,
1479 &mut rec_region,
1480 tx_size,
1481 fi.sequence.bit_depth,
1482 &[0i16; 2],
1483 IntraParam::None,
1484 if luma_mode.is_directional() { ief_params } else { None },
1485 &edge_buf,
1486 fi.cpu_feature_level,
1487 );
1488
1489 let plane_org = ts.input_tile.planes[0]
1490 .subregion(Area::BlockStartingAt { bo: tile_bo.0 });
1491 let plane_ref = rec_region.as_const();
1492
1493 satds_all[luma_mode as usize] = get_satd(
1494 &plane_org,
1495 &plane_ref,
1496 tx_size.width(),
1497 tx_size.height(),
1498 fi.sequence.bit_depth,
1499 fi.cpu_feature_level,
1500 );
1501 }
1502 satds_all
1503 };
1504
1505 modes[num_modes_rdo / 2..].sort_by_key(|&a| satds[a as usize]);
1506 }
1507
1508 debug_assert!(num_modes_rdo >= 1);
1509
1510 modes.iter().take(num_modes_rdo).for_each(|&luma_mode| {
1511 let mvs = [MotionVector::default(); 2];
1512 let ref_frames = [INTRA_FRAME, NONE_FRAME];
1513 let mut mode_set_chroma = ArrayVec::<_, 2>::new();
1514 mode_set_chroma.push(luma_mode);
1515 if is_chroma_block && luma_mode != PredictionMode::DC_PRED {
1516 mode_set_chroma.push(PredictionMode::DC_PRED);
1517 }
1518 luma_chroma_mode_rdo(
1519 luma_mode,
1520 fi,
1521 bsize,
1522 tile_bo,
1523 ts,
1524 cw,
1525 rdo_type,
1526 cw_checkpoint,
1527 &mut best,
1528 mvs,
1529 ref_frames,
1530 &mode_set_chroma,
1531 true,
1532 0,
1533 &ArrayVec::<CandidateMV, 9>::new(),
1534 AngleDelta::default(),
1535 );
1536 });
1537
1538 if fi.config.speed_settings.prediction.fine_directional_intra
1539 && bsize >= BlockSize::BLOCK_8X8
1540 {
1541 let luma_deltas = best.pred_mode_luma.angle_delta_count();
1543 let chroma_deltas = best.pred_mode_chroma.angle_delta_count();
1544
1545 let mvs = [MotionVector::default(); 2];
1546 let ref_frames = [INTRA_FRAME, NONE_FRAME];
1547 let mode_set_chroma = [best.pred_mode_chroma];
1548 let mv_stack = ArrayVec::<_, 9>::new();
1549 let mut best_angle_delta = best.angle_delta;
1550 let mut angle_delta_rdo = |y, uv| -> AngleDelta {
1551 if best.angle_delta.y != y || best.angle_delta.uv != uv {
1552 luma_chroma_mode_rdo(
1553 best.pred_mode_luma,
1554 fi,
1555 bsize,
1556 tile_bo,
1557 ts,
1558 cw,
1559 rdo_type,
1560 cw_checkpoint,
1561 &mut best,
1562 mvs,
1563 ref_frames,
1564 &mode_set_chroma,
1565 true,
1566 0,
1567 &mv_stack,
1568 AngleDelta { y, uv },
1569 );
1570 }
1571 best.angle_delta
1572 };
1573
1574 for i in 0..luma_deltas {
1575 let angle_delta_y =
1576 if luma_deltas == 1 { 0 } else { i - MAX_ANGLE_DELTA as i8 };
1577 best_angle_delta = angle_delta_rdo(angle_delta_y, best_angle_delta.uv);
1578 }
1579 for j in 0..chroma_deltas {
1580 let angle_delta_uv =
1581 if chroma_deltas == 1 { 0 } else { j - MAX_ANGLE_DELTA as i8 };
1582 best_angle_delta = angle_delta_rdo(best_angle_delta.y, angle_delta_uv);
1583 }
1584 }
1585
1586 best
1587}
1588
1589#[profiling::function]
1593pub fn rdo_cfl_alpha<T: Pixel>(
1594 ts: &mut TileStateMut<'_, T>, tile_bo: TileBlockOffset, bsize: BlockSize,
1595 luma_tx_size: TxSize, fi: &FrameInvariants<T>,
1596) -> Option<CFLParams> {
1597 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
1598 let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec);
1599 debug_assert!(
1600 bsize.subsampled_size(xdec, ydec).unwrap() == uv_tx_size.block_size()
1601 );
1602
1603 let frame_bo = ts.to_frame_block_offset(tile_bo);
1604 let (visible_tx_w, visible_tx_h) = clip_visible_bsize(
1605 (fi.width + xdec) >> xdec,
1606 (fi.height + ydec) >> ydec,
1607 uv_tx_size.block_size(),
1608 (frame_bo.0.x << MI_SIZE_LOG2) >> xdec,
1609 (frame_bo.0.y << MI_SIZE_LOG2) >> ydec,
1610 );
1611
1612 if visible_tx_w == 0 || visible_tx_h == 0 {
1613 return None;
1614 };
1615 let mut ac = Aligned::<[MaybeUninit<i16>; 32 * 32]>::uninit_array();
1616 let ac = luma_ac(&mut ac.data, ts, tile_bo, bsize, luma_tx_size, fi);
1617 let best_alpha: ArrayVec<i16, 2> = (1..3)
1618 .map(|p| {
1619 let &PlaneConfig { xdec, ydec, .. } = ts.rec.planes[p].plane_cfg;
1620 let tile_rect = ts.tile_rect().decimated(xdec, ydec);
1621 let rec = &mut ts.rec.planes[p];
1622 let input = &ts.input_tile.planes[p];
1623 let po = tile_bo.plane_offset(rec.plane_cfg);
1624 let mut edge_buf = Aligned::uninit_array();
1625 let edge_buf = get_intra_edges(
1626 &mut edge_buf,
1627 &rec.as_const(),
1628 tile_bo,
1629 0,
1630 0,
1631 bsize,
1632 po,
1633 uv_tx_size,
1634 fi.sequence.bit_depth,
1635 Some(PredictionMode::UV_CFL_PRED),
1636 fi.sequence.enable_intra_edge_filter,
1637 IntraParam::None,
1638 );
1639 let mut alpha_cost = |alpha: i16| -> u64 {
1640 let mut rec_region =
1641 rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1642 PredictionMode::UV_CFL_PRED.predict_intra(
1643 tile_rect,
1644 &mut rec_region,
1645 uv_tx_size,
1646 fi.sequence.bit_depth,
1647 ac,
1648 IntraParam::Alpha(alpha),
1649 None,
1650 &edge_buf,
1651 fi.cpu_feature_level,
1652 );
1653 sse_wxh(
1654 &input.subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
1655 &rec_region.as_const(),
1656 visible_tx_w,
1657 visible_tx_h,
1658 |_, _| DistortionScale::default(), fi.sequence.bit_depth,
1660 fi.cpu_feature_level,
1661 )
1662 .0
1663 };
1664 let mut best = (alpha_cost(0), 0);
1665 let mut count = 2;
1666 for alpha in 1i16..=16i16 {
1667 let cost = (alpha_cost(alpha), alpha_cost(-alpha));
1668 if cost.0 < best.0 {
1669 best = (cost.0, alpha);
1670 count += 2;
1671 }
1672 if cost.1 < best.0 {
1673 best = (cost.1, -alpha);
1674 count += 2;
1675 }
1676 if count < alpha {
1677 break;
1678 }
1679 }
1680 best.1
1681 })
1682 .collect();
1683
1684 if best_alpha[0] == 0 && best_alpha[1] == 0 {
1685 None
1686 } else {
1687 Some(CFLParams::from_alpha(best_alpha[0], best_alpha[1]))
1688 }
1689}
1690
1691pub fn rdo_tx_type_decision<T: Pixel>(
1702 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1703 cw: &mut ContextWriter, cw_checkpoint: &mut Option<ContextWriterCheckpoint>,
1704 mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
1705 bsize: BlockSize, tile_bo: TileBlockOffset, tx_size: TxSize, tx_set: TxSet,
1706 tx_types: &[TxType], cur_best_rd: f64,
1707) -> (TxType, f64) {
1708 let mut best_type = TxType::DCT_DCT;
1709 let mut best_rd = f64::MAX;
1710
1711 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
1712 let is_chroma_block =
1713 has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
1714
1715 let is_inter = !mode.is_intra();
1716
1717 if cw_checkpoint.is_none() {
1718 *cw_checkpoint =
1721 Some(cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling));
1722 }
1723
1724 let rdo_type = if fi.use_tx_domain_distortion {
1725 RDOType::TxDistRealRate
1726 } else {
1727 RDOType::PixelDistRealRate
1728 };
1729 let need_recon_pixel = tx_size.block_size() != bsize && !is_inter;
1730
1731 let mut first_iteration = true;
1732 for &tx_type in tx_types {
1733 if av1_tx_used[tx_set as usize][tx_type as usize] == 0 {
1735 continue;
1736 }
1737
1738 if is_inter {
1739 motion_compensate(
1740 fi, ts, cw, mode, ref_frames, mvs, bsize, tile_bo, true,
1741 );
1742 }
1743
1744 let mut wr = WriterCounter::new();
1745 let tell = wr.tell_frac();
1746 let (_, tx_dist) = if is_inter {
1747 write_tx_tree(
1748 fi,
1749 ts,
1750 cw,
1751 &mut wr,
1752 mode,
1753 0,
1754 tile_bo,
1755 bsize,
1756 tx_size,
1757 tx_type,
1758 false,
1759 true,
1760 rdo_type,
1761 need_recon_pixel,
1762 )
1763 } else {
1764 write_tx_blocks(
1765 fi,
1766 ts,
1767 cw,
1768 &mut wr,
1769 mode,
1770 mode,
1771 AngleDelta::default(),
1772 tile_bo,
1773 bsize,
1774 tx_size,
1775 tx_type,
1776 false,
1777 CFLParams::default(), true,
1779 rdo_type,
1780 need_recon_pixel,
1781 )
1782 };
1783
1784 let rate = wr.tell_frac() - tell;
1785 let distortion = if fi.use_tx_domain_distortion {
1786 compute_tx_distortion(
1787 fi,
1788 ts,
1789 bsize,
1790 is_chroma_block,
1791 tile_bo,
1792 tx_dist,
1793 false,
1794 true,
1795 )
1796 } else {
1797 compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, true)
1798 };
1799 cw.rollback(cw_checkpoint.as_ref().unwrap());
1800
1801 let rd = compute_rd_cost(fi, rate, distortion);
1802
1803 if first_iteration {
1804 if rd > cur_best_rd {
1809 break;
1810 }
1811 first_iteration = false;
1812 }
1813
1814 if rd < best_rd {
1815 best_rd = rd;
1816 best_type = tx_type;
1817 }
1818 }
1819
1820 assert!(best_rd >= 0_f64);
1821
1822 (best_type, best_rd)
1823}
1824
1825pub fn get_sub_partitions(
1826 four_partitions: &[TileBlockOffset; 4], partition: PartitionType,
1827) -> ArrayVec<TileBlockOffset, 4> {
1828 let mut partition_offsets = ArrayVec::<TileBlockOffset, 4>::new();
1829
1830 partition_offsets.push(four_partitions[0]);
1831
1832 if partition == PARTITION_NONE {
1833 return partition_offsets;
1834 }
1835 if partition == PARTITION_VERT || partition == PARTITION_SPLIT {
1836 partition_offsets.push(four_partitions[1]);
1837 };
1838 if partition == PARTITION_HORZ || partition == PARTITION_SPLIT {
1839 partition_offsets.push(four_partitions[2]);
1840 };
1841 if partition == PARTITION_SPLIT {
1842 partition_offsets.push(four_partitions[3]);
1843 };
1844
1845 partition_offsets
1846}
1847
1848#[inline(always)]
1849fn rdo_partition_none<T: Pixel>(
1850 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1851 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1852 inter_cfg: &InterConfig, child_modes: &mut ArrayVec<PartitionParameters, 4>,
1853) -> f64 {
1854 debug_assert!(tile_bo.0.x < ts.mi_width && tile_bo.0.y < ts.mi_height);
1855
1856 let mode = rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg);
1857 let cost = mode.rd_cost;
1858
1859 child_modes.push(mode);
1860
1861 cost
1862}
1863
1864#[inline(always)]
1866fn rdo_partition_simple<T: Pixel, W: Writer>(
1867 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1868 cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
1869 bsize: BlockSize, tile_bo: TileBlockOffset, inter_cfg: &InterConfig,
1870 partition: PartitionType, rdo_type: RDOType, best_rd: f64,
1871 child_modes: &mut ArrayVec<PartitionParameters, 4>,
1872) -> Option<f64> {
1873 debug_assert!(tile_bo.0.x < ts.mi_width && tile_bo.0.y < ts.mi_height);
1874 let subsize = bsize.subsize(partition).unwrap();
1875
1876 let cost = if bsize >= BlockSize::BLOCK_8X8 {
1877 let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
1878 let tell = w.tell_frac();
1879 cw.write_partition(w, tile_bo, partition, bsize);
1880 compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero())
1881 } else {
1882 0.0
1883 };
1884
1885 let hbsw = subsize.width_mi(); let hbsh = subsize.height_mi(); let four_partitions = [
1888 tile_bo,
1889 TileBlockOffset(BlockOffset { x: tile_bo.0.x + hbsw, y: tile_bo.0.y }),
1890 TileBlockOffset(BlockOffset { x: tile_bo.0.x, y: tile_bo.0.y + hbsh }),
1891 TileBlockOffset(BlockOffset {
1892 x: tile_bo.0.x + hbsw,
1893 y: tile_bo.0.y + hbsh,
1894 }),
1895 ];
1896
1897 let partitions = get_sub_partitions(&four_partitions, partition);
1898
1899 let mut rd_cost_sum = 0.0;
1900
1901 for offset in partitions {
1902 let hbs = subsize.width_mi() >> 1;
1903 let has_cols = offset.0.x + hbs < ts.mi_width;
1904 let has_rows = offset.0.y + hbs < ts.mi_height;
1905
1906 if has_cols && has_rows {
1907 let mode_decision =
1908 rdo_mode_decision(fi, ts, cw, subsize, offset, inter_cfg);
1909
1910 rd_cost_sum += mode_decision.rd_cost;
1911
1912 if fi.enable_early_exit && rd_cost_sum > best_rd {
1913 return None;
1914 }
1915 if subsize >= BlockSize::BLOCK_8X8 && subsize.is_sqr() {
1916 let w: &mut W =
1917 if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
1918 cw.write_partition(w, offset, PartitionType::PARTITION_NONE, subsize);
1919 }
1920 encode_block_with_modes(
1921 fi,
1922 ts,
1923 cw,
1924 w_pre_cdef,
1925 w_post_cdef,
1926 subsize,
1927 offset,
1928 &mode_decision,
1929 rdo_type,
1930 None,
1931 );
1932 child_modes.push(mode_decision);
1933 } else {
1934 return None;
1936 }
1937 }
1938
1939 Some(cost + rd_cost_sum)
1940}
1941
1942#[profiling::function]
1949pub fn rdo_partition_decision<T: Pixel, W: Writer>(
1950 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1951 cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
1952 bsize: BlockSize, tile_bo: TileBlockOffset,
1953 cached_block: &PartitionGroupParameters, partition_types: &[PartitionType],
1954 rdo_type: RDOType, inter_cfg: &InterConfig,
1955) -> PartitionGroupParameters {
1956 let mut best_partition = cached_block.part_type;
1957 let mut best_rd = cached_block.rd_cost;
1958 let mut best_pred_modes = cached_block.part_modes.clone();
1959
1960 let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
1961 let w_pre_checkpoint = w_pre_cdef.checkpoint();
1962 let w_post_checkpoint = w_post_cdef.checkpoint();
1963
1964 for &partition in partition_types {
1965 if partition == cached_block.part_type {
1967 continue;
1968 }
1969
1970 let mut child_modes = ArrayVec::<_, 4>::new();
1971
1972 let cost = match partition {
1973 PARTITION_NONE if bsize <= BlockSize::BLOCK_64X64 => {
1974 Some(rdo_partition_none(
1975 fi,
1976 ts,
1977 cw,
1978 bsize,
1979 tile_bo,
1980 inter_cfg,
1981 &mut child_modes,
1982 ))
1983 }
1984 PARTITION_SPLIT | PARTITION_HORZ | PARTITION_VERT => {
1985 rdo_partition_simple(
1986 fi,
1987 ts,
1988 cw,
1989 w_pre_cdef,
1990 w_post_cdef,
1991 bsize,
1992 tile_bo,
1993 inter_cfg,
1994 partition,
1995 rdo_type,
1996 best_rd,
1997 &mut child_modes,
1998 )
1999 }
2000 _ => {
2001 unreachable!();
2002 }
2003 };
2004
2005 if let Some(rd) = cost {
2006 if rd < best_rd {
2007 best_rd = rd;
2008 best_partition = partition;
2009 best_pred_modes.clone_from(&child_modes);
2010 }
2011 }
2012 cw.rollback(&cw_checkpoint);
2013 w_pre_cdef.rollback(&w_pre_checkpoint);
2014 w_post_cdef.rollback(&w_post_checkpoint);
2015 }
2016
2017 assert!(best_rd >= 0_f64);
2018
2019 PartitionGroupParameters {
2020 rd_cost: best_rd,
2021 part_type: best_partition,
2022 part_modes: best_pred_modes,
2023 }
2024}
2025
2026#[profiling::function]
2027fn rdo_loop_plane_error<T: Pixel>(
2028 base_sbo: TileSuperBlockOffset, offset_sbo: TileSuperBlockOffset,
2029 sb_w: usize, sb_h: usize, fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
2030 blocks: &TileBlocks<'_>, test: &Frame<T>, src: &Tile<'_, T>, pli: usize,
2031) -> ScaledDistortion {
2032 let sb_w_blocks =
2033 if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_w;
2034 let sb_h_blocks =
2035 if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_h;
2036 let mut err = Distortion::zero();
2039 for by in 0..sb_h_blocks {
2040 for bx in 0..sb_w_blocks {
2041 let loop_bo = offset_sbo.block_offset(bx << 1, by << 1);
2042 if loop_bo.0.x < blocks.cols() && loop_bo.0.y < blocks.rows() {
2043 let src_plane = &src.planes[pli];
2044 let test_plane = &test.planes[pli];
2045 let PlaneConfig { xdec, ydec, .. } = *src_plane.plane_cfg;
2046 debug_assert_eq!(xdec, test_plane.cfg.xdec);
2047 debug_assert_eq!(ydec, test_plane.cfg.ydec);
2048
2049 let frame_bo = (base_sbo + offset_sbo).block_offset(bx << 1, by << 1);
2054 let bias = distortion_scale(
2055 fi,
2056 ts.to_frame_block_offset(frame_bo),
2057 BlockSize::BLOCK_8X8,
2058 );
2059
2060 let src_region =
2061 src_plane.subregion(Area::BlockStartingAt { bo: loop_bo.0 });
2062 let test_region =
2063 test_plane.region(Area::BlockStartingAt { bo: loop_bo.0 });
2064
2065 err += if pli == 0 {
2066 RawDistortion(cdef_dist_kernel(
2070 &src_region,
2071 &test_region,
2072 8,
2073 8,
2074 fi.sequence.bit_depth,
2075 fi.cpu_feature_level,
2076 ) as u64)
2077 * bias
2078 } else {
2079 sse_wxh(
2080 &src_region,
2081 &test_region,
2082 8 >> xdec,
2083 8 >> ydec,
2084 |_, _| bias,
2085 fi.sequence.bit_depth,
2086 fi.cpu_feature_level,
2087 )
2088 };
2089 }
2090 }
2091 }
2092 err * fi.dist_scale[pli]
2093}
2094
2095#[profiling::function]
2104pub fn rdo_loop_decision<T: Pixel, W: Writer>(
2105 base_sbo: TileSuperBlockOffset, fi: &FrameInvariants<T>,
2106 ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, w: &mut W,
2107 deblock_p: bool,
2108) {
2109 let planes = if fi.sequence.chroma_sampling == ChromaSampling::Cs400 {
2110 1
2111 } else {
2112 MAX_PLANES
2113 };
2114 assert!(fi.sequence.enable_cdef || fi.sequence.enable_restoration);
2115 let mut sb_w = 1; let mut sb_h = 1; let mut lru_w = [0; MAX_PLANES]; let mut lru_h = [0; MAX_PLANES]; for pli in 0..planes {
2124 let sb_h_shift = ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2125 let sb_v_shift = ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2126 if sb_w < (1 << sb_h_shift) {
2127 sb_w = 1 << sb_h_shift;
2128 }
2129 if sb_h < (1 << sb_v_shift) {
2130 sb_h = 1 << sb_v_shift;
2131 }
2132 }
2133 for pli in 0..planes {
2134 let sb_h_shift = ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2135 let sb_v_shift = ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2136 lru_w[pli] = sb_w / (1 << sb_h_shift);
2137 lru_h[pli] = sb_h / (1 << sb_v_shift);
2138 }
2139
2140 sb_w = sb_w.min(ts.sb_width - base_sbo.0.x);
2147 sb_h = sb_h.min(ts.sb_height - base_sbo.0.y);
2148
2149 let crop_w =
2153 fi.width - ((ts.sbo.0.x + base_sbo.0.x) << SUPERBLOCK_TO_PLANE_SHIFT);
2154 let crop_h =
2155 fi.height - ((ts.sbo.0.y + base_sbo.0.y) << SUPERBLOCK_TO_PLANE_SHIFT);
2156 let pixel_w = crop_w.min(sb_w << SUPERBLOCK_TO_PLANE_SHIFT);
2157 let pixel_h = crop_h.min(sb_h << SUPERBLOCK_TO_PLANE_SHIFT);
2158
2159 const MAX_SB_SHIFT: usize = 4;
2161 const MAX_SB_SIZE: usize = 1 << MAX_SB_SHIFT;
2162 const MAX_LRU_SIZE: usize = MAX_SB_SIZE;
2163
2164 let mut best_index = [-1; MAX_SB_SIZE * MAX_SB_SIZE];
2166 let mut best_lrf =
2167 [[RestorationFilter::None; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2168
2169 let mut best_lrf_cost = [[-1.0; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2174
2175 let mut tileblocks_subset = cw.bc.blocks.subregion_mut(
2179 base_sbo.block_offset(0, 0).0.x,
2180 base_sbo.block_offset(0, 0).0.y,
2181 sb_w << SUPERBLOCK_TO_BLOCK_SHIFT,
2182 sb_h << SUPERBLOCK_TO_BLOCK_SHIFT,
2183 );
2184
2185 let mut cdef_skip = [true; MAX_SB_SIZE * MAX_SB_SIZE];
2190 let mut cdef_skip_all = true;
2191 if fi.sequence.enable_cdef {
2192 for sby in 0..sb_h {
2193 for sbx in 0..sb_w {
2194 let blocks = tileblocks_subset.subregion(16 * sbx, 16 * sby, 16, 16);
2195 let mut skip = true;
2196 for y in 0..blocks.rows() {
2197 for block in blocks[y].iter() {
2198 skip &= block.skip;
2199 }
2200 }
2201 cdef_skip[sby * MAX_SB_SIZE + sbx] = skip;
2202 cdef_skip_all &= skip;
2203 }
2204 }
2205 }
2206
2207 let mut lru_skip_all = true;
2213 let mut lru_skip = [[true; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2214 if fi.sequence.enable_restoration {
2215 if fi.config.speed_settings.lru_on_skip {
2216 lru_skip_all = false;
2217 lru_skip = [[false; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2218 } else {
2219 for pli in 0..planes {
2220 let lru_sb_w = 1 << ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2222 let lru_sb_h = 1 << ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2224 for lru_y in 0..lru_h[pli] {
2225 for lru_x in 0..lru_w[pli] {
2227 let loop_sbo = TileSuperBlockOffset(SuperBlockOffset {
2230 x: lru_x * lru_sb_w,
2231 y: lru_y * lru_sb_h,
2232 });
2233
2234 if !ts.restoration.has_restoration_unit(
2235 base_sbo + loop_sbo,
2236 pli,
2237 false,
2238 ) {
2239 continue;
2240 }
2241
2242 let start = loop_sbo.block_offset(0, 0).0;
2243 let size = TileSuperBlockOffset(SuperBlockOffset {
2244 x: lru_sb_w,
2245 y: lru_sb_h,
2246 })
2247 .block_offset(0, 0)
2248 .0;
2249
2250 let blocks =
2251 tileblocks_subset.subregion(start.x, start.y, size.x, size.y);
2252 let mut skip = true;
2253 for y in 0..blocks.rows() {
2254 for block in blocks[y].iter() {
2255 skip &= block.skip;
2256 }
2257 }
2258 lru_skip[lru_y * MAX_LRU_SIZE + lru_x][pli] = skip;
2259 lru_skip_all &= skip;
2260 }
2261 }
2262 }
2263 }
2264 }
2265
2266 if lru_skip_all && cdef_skip_all {
2268 return;
2269 }
2270
2271 let mut rec_subset = ts
2278 .rec
2279 .subregion(Area::BlockRect {
2280 bo: base_sbo.block_offset(0, 0).0,
2281 width: (pixel_w + 7) >> 3 << 3,
2282 height: (pixel_h + 7) >> 3 << 3,
2283 })
2284 .scratch_copy();
2285
2286 let src_subset = ts
2289 .input_tile
2290 .subregion(Area::BlockRect {
2291 bo: base_sbo.block_offset(0, 0).0,
2292 width: (pixel_w + 7) >> 3 << 3,
2293 height: (pixel_h + 7) >> 3 << 3,
2294 })
2295 .home();
2296
2297 if deblock_p {
2298 let deblock_levels = deblock_filter_optimize(
2302 fi,
2303 &rec_subset.as_tile(),
2304 &src_subset,
2305 &tileblocks_subset.as_const(),
2306 crop_w,
2307 crop_h,
2308 );
2309
2310 if deblock_levels[0] != 0 || deblock_levels[1] != 0 {
2312 let mut deblock_copy = *ts.deblock;
2314 deblock_copy.levels = deblock_levels;
2315
2316 deblock_filter_frame(
2318 &deblock_copy,
2319 &mut rec_subset.as_tile_mut(),
2320 &tileblocks_subset.as_const(),
2321 crop_w,
2322 crop_h,
2323 fi.sequence.bit_depth,
2324 planes,
2325 );
2326 }
2327 }
2328
2329 let mut cdef_work =
2330 if !cdef_skip_all { Some(rec_subset.clone()) } else { None };
2331 let mut lrf_work = if !lru_skip_all {
2332 Some(Frame {
2333 planes: {
2334 let new_plane = |pli: usize| {
2335 let PlaneConfig { xdec, ydec, width, height, .. } =
2336 rec_subset.planes[pli].cfg;
2337 Plane::new(width, height, xdec, ydec, 0, 0)
2338 };
2339 [new_plane(0), new_plane(1), new_plane(2)]
2340 },
2341 })
2342 } else {
2343 None
2344 };
2345
2346 let cdef_data = {
2348 if cdef_work.is_some() {
2349 Some((
2350 &rec_subset,
2351 cdef_analyze_superblock_range(
2352 fi,
2353 &rec_subset,
2354 &tileblocks_subset.as_const(),
2355 sb_w,
2356 sb_h,
2357 ),
2358 ))
2359 } else {
2360 None
2361 }
2362 };
2363
2364 let mut cdef_change = true;
2371 let mut lrf_change = true;
2372 while cdef_change || lrf_change {
2373 if let (Some((rec_copy, cdef_dirs)), Some(cdef_ref)) =
2375 (&cdef_data, &mut cdef_work.as_mut())
2376 {
2377 for sby in 0..sb_h {
2378 for sbx in 0..sb_w {
2379 if cdef_skip[sby * MAX_SB_SIZE + sbx] {
2381 continue;
2382 }
2383
2384 let prev_best_index = best_index[sby * sb_w + sbx];
2385 let mut best_cost = -1.;
2386 let mut best_new_index = -1i8;
2387
2388 let loop_sbo =
2391 TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby });
2392
2393 for cdef_index in 0..(1 << fi.cdef_bits) {
2395 let mut err = ScaledDistortion::zero();
2396 let mut rate = 0;
2397
2398 cdef_filter_superblock(
2399 fi,
2400 &rec_subset,
2401 &mut cdef_ref.as_tile_mut(),
2402 &tileblocks_subset.as_const(),
2403 loop_sbo,
2404 cdef_index,
2405 &cdef_dirs[sby * sb_w + sbx],
2406 );
2407 for pli in 0..planes {
2409 let wh =
2411 if fi.sequence.use_128x128_superblock { 128 } else { 64 };
2412 let PlaneConfig { xdec, ydec, .. } = cdef_ref.planes[pli].cfg;
2413 let vis_width = (wh >> xdec).min(
2414 (crop_w >> xdec)
2415 - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).x
2416 as usize,
2417 );
2418 let vis_height = (wh >> ydec).min(
2419 (crop_h >> ydec)
2420 - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).y
2421 as usize,
2422 );
2423 if let (Some((lru_x, lru_y)), Some(lrf_ref)) = {
2425 let rp = &ts.restoration.planes[pli];
2426 (
2427 rp.restoration_unit_offset(base_sbo, loop_sbo, false),
2428 &mut lrf_work,
2429 )
2430 } {
2431 match best_lrf[lru_y * lru_w[pli] + lru_x][pli] {
2433 RestorationFilter::None => {
2434 err += rdo_loop_plane_error(
2435 base_sbo,
2436 loop_sbo,
2437 1,
2438 1,
2439 fi,
2440 ts,
2441 &tileblocks_subset.as_const(),
2442 cdef_ref,
2443 &src_subset,
2444 pli,
2445 );
2446 rate += if fi.sequence.enable_restoration {
2447 cw.fc.count_lrf_switchable(
2448 w,
2449 &ts.restoration.as_const(),
2450 best_lrf[lru_y * lru_w[pli] + lru_x][pli],
2451 pli,
2452 )
2453 } else {
2454 0 };
2457 }
2458 RestorationFilter::Sgrproj { set, xqd } => {
2459 let loop_po =
2461 loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg);
2462 setup_integral_image(
2466 &mut ts.integral_buffer,
2467 SOLVE_IMAGE_STRIDE,
2468 vis_width,
2469 vis_height,
2470 vis_width,
2471 vis_height,
2472 &cdef_ref.planes[pli].slice(loop_po),
2473 &cdef_ref.planes[pli].slice(loop_po),
2474 );
2475 sgrproj_stripe_filter(
2476 set,
2477 xqd,
2478 fi,
2479 &ts.integral_buffer,
2480 SOLVE_IMAGE_STRIDE,
2481 &cdef_ref.planes[pli].slice(loop_po),
2482 &mut lrf_ref.planes[pli].region_mut(Area::Rect {
2483 x: loop_po.x,
2484 y: loop_po.y,
2485 width: vis_width,
2486 height: vis_height,
2487 }),
2488 );
2489 err += rdo_loop_plane_error(
2490 base_sbo,
2491 loop_sbo,
2492 1,
2493 1,
2494 fi,
2495 ts,
2496 &tileblocks_subset.as_const(),
2497 lrf_ref,
2498 &src_subset,
2499 pli,
2500 );
2501 rate += cw.fc.count_lrf_switchable(
2502 w,
2503 &ts.restoration.as_const(),
2504 best_lrf[lru_y * lru_w[pli] + lru_x][pli],
2505 pli,
2506 );
2507 }
2508 RestorationFilter::Wiener { .. } => unreachable!(), }
2510 } else {
2511 err += rdo_loop_plane_error(
2513 base_sbo,
2514 loop_sbo,
2515 1,
2516 1,
2517 fi,
2518 ts,
2519 &tileblocks_subset.as_const(),
2520 cdef_ref,
2521 &src_subset,
2522 pli,
2523 );
2524 }
2528 }
2529
2530 let cost = compute_rd_cost(fi, rate, err);
2531 if best_cost < 0. || cost < best_cost {
2532 best_cost = cost;
2533 best_new_index = cdef_index as i8;
2534 }
2535 }
2536
2537 if best_new_index != prev_best_index {
2539 cdef_change = true;
2540 best_index[sby * sb_w + sbx] = best_new_index;
2541 tileblocks_subset.set_cdef(loop_sbo, best_new_index as u8);
2542 }
2543
2544 let mut cdef_ref_tm = TileMut::new(
2545 cdef_ref,
2546 TileRect {
2547 x: 0,
2548 y: 0,
2549 width: cdef_ref.planes[0].cfg.width,
2550 height: cdef_ref.planes[0].cfg.height,
2551 },
2552 );
2553
2554 cdef_filter_superblock(
2557 fi,
2558 rec_copy,
2559 &mut cdef_ref_tm,
2560 &tileblocks_subset.as_const(),
2561 loop_sbo,
2562 best_index[sby * sb_w + sbx] as u8,
2563 &cdef_dirs[sby * sb_w + sbx],
2564 );
2565 }
2566 }
2567 }
2568
2569 if !cdef_change {
2570 break;
2571 }
2572 cdef_change = false;
2573 lrf_change = false;
2574
2575 if let Some(lrf_ref) = &mut lrf_work.as_mut() {
2577 let lrf_input = if cdef_work.is_some() {
2578 cdef_work.as_ref().unwrap()
2580 } else {
2581 &rec_subset
2584 };
2585 for pli in 0..planes {
2586 let unit_size = ts.restoration.planes[pli].rp_cfg.unit_size;
2588 let lru_sb_w = 1 << ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2590 let lru_sb_h = 1 << ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2592 let PlaneConfig { xdec, ydec, .. } = lrf_ref.planes[pli].cfg;
2593 for lru_y in 0..lru_h[pli] {
2594 for lru_x in 0..lru_w[pli] {
2596 if lru_skip[lru_y * MAX_LRU_SIZE + lru_x][pli] {
2600 continue;
2601 }
2602
2603 let loop_sbo = TileSuperBlockOffset(SuperBlockOffset {
2604 x: lru_x * lru_sb_w,
2605 y: lru_y * lru_sb_h,
2606 });
2607 if ts.restoration.has_restoration_unit(
2608 base_sbo + loop_sbo,
2609 pli,
2610 false,
2611 ) {
2612 let src_plane = &src_subset.planes[pli]; let lrf_in_plane = &lrf_input.planes[pli];
2614 let lrf_po = loop_sbo.plane_offset(src_plane.plane_cfg);
2615 let mut best_new_lrf = best_lrf[lru_y * lru_w[pli] + lru_x][pli];
2616 let mut best_cost =
2617 best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli];
2618
2619 {
2621 let err = rdo_loop_plane_error(
2622 base_sbo,
2623 loop_sbo,
2624 lru_sb_w,
2625 lru_sb_h,
2626 fi,
2627 ts,
2628 &tileblocks_subset.as_const(),
2629 lrf_input,
2630 &src_subset,
2631 pli,
2632 );
2633 let rate = cw.fc.count_lrf_switchable(
2634 w,
2635 &ts.restoration.as_const(),
2636 best_new_lrf,
2637 pli,
2638 );
2639
2640 let cost = compute_rd_cost(fi, rate, err);
2641 if best_cost < 0. || cost < best_cost {
2643 best_cost = cost;
2644 best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli] = cost;
2645 best_new_lrf = RestorationFilter::None;
2646 }
2647 }
2648
2649 let vis_width = unit_size.min(
2652 (crop_w >> xdec)
2653 - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).x as usize,
2654 );
2655 let vis_height = unit_size.min(
2656 (crop_h >> ydec)
2657 - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).y as usize,
2658 );
2659
2660 setup_integral_image(
2664 &mut ts.integral_buffer,
2665 SOLVE_IMAGE_STRIDE,
2666 vis_width,
2667 vis_height,
2668 vis_width,
2669 vis_height,
2670 &lrf_in_plane.slice(lrf_po),
2671 &lrf_in_plane.slice(lrf_po),
2672 );
2673
2674 for &set in get_sgr_sets(fi.config.speed_settings.sgr_complexity)
2675 {
2676 let (xqd0, xqd1) = sgrproj_solve(
2677 set,
2678 fi,
2679 &ts.integral_buffer,
2680 &src_plane
2681 .subregion(Area::StartingAt { x: lrf_po.x, y: lrf_po.y }),
2682 &lrf_in_plane.slice(lrf_po),
2683 vis_width,
2684 vis_height,
2685 );
2686 let current_lrf =
2687 RestorationFilter::Sgrproj { set, xqd: [xqd0, xqd1] };
2688 if let RestorationFilter::Sgrproj { set, xqd } = current_lrf {
2689 sgrproj_stripe_filter(
2690 set,
2691 xqd,
2692 fi,
2693 &ts.integral_buffer,
2694 SOLVE_IMAGE_STRIDE,
2695 &lrf_in_plane.slice(lrf_po),
2696 &mut lrf_ref.planes[pli].region_mut(Area::Rect {
2697 x: lrf_po.x,
2698 y: lrf_po.y,
2699 width: vis_width,
2700 height: vis_height,
2701 }),
2702 );
2703 }
2704 let err = rdo_loop_plane_error(
2705 base_sbo,
2706 loop_sbo,
2707 lru_sb_w,
2708 lru_sb_h,
2709 fi,
2710 ts,
2711 &tileblocks_subset.as_const(),
2712 lrf_ref,
2713 &src_subset,
2714 pli,
2715 );
2716 let rate = cw.fc.count_lrf_switchable(
2717 w,
2718 &ts.restoration.as_const(),
2719 current_lrf,
2720 pli,
2721 );
2722 let cost = compute_rd_cost(fi, rate, err);
2723 if cost < best_cost {
2724 best_cost = cost;
2725 best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli] = cost;
2726 best_new_lrf = current_lrf;
2727 }
2728 }
2729
2730 if best_lrf[lru_y * lru_w[pli] + lru_x][pli]
2731 .notequal(best_new_lrf)
2732 {
2733 best_lrf[lru_y * lru_w[pli] + lru_x][pli] = best_new_lrf;
2734 lrf_change = true;
2735 if let Some(ru) = ts.restoration.planes[pli]
2736 .restoration_unit_mut(base_sbo + loop_sbo)
2737 {
2738 ru.filter = best_new_lrf;
2739 }
2740 }
2741 }
2742 }
2743 }
2744 }
2745 }
2746 }
2747}
2748
2749#[test]
2750fn estimate_rate_test() {
2751 assert_eq!(estimate_rate(0, TxSize::TX_4X4, 0), RDO_RATE_TABLE[0][0][0]);
2752}