1use std::collections::VecDeque;
11use std::io::Write;
12use std::mem::MaybeUninit;
13use std::sync::Arc;
14use std::{fmt, io, mem};
15
16use arg_enum_proc_macro::ArgEnum;
17use arrayvec::*;
18use bitstream_io::{BigEndian, BitWrite, BitWriter};
19use rayon::iter::*;
20
21use crate::activity::*;
22use crate::api::*;
23use crate::cdef::*;
24use crate::context::*;
25use crate::deblock::*;
26use crate::ec::*;
27use crate::frame::*;
28use crate::header::*;
29use crate::lrf::*;
30use crate::mc::{FilterMode, MotionVector};
31use crate::me::*;
32use crate::partition::PartitionType::*;
33use crate::partition::RefType::*;
34use crate::partition::*;
35use crate::predict::{
36 luma_ac, AngleDelta, IntraEdgeFilterParameters, IntraParam, PredictionMode,
37};
38use crate::quantize::*;
39use crate::rate::{
40 QuantizerParameters, FRAME_SUBTYPE_I, FRAME_SUBTYPE_P, QSCALE,
41};
42use crate::rdo::*;
43use crate::segmentation::*;
44use crate::serialize::{Deserialize, Serialize};
45use crate::stats::EncoderStats;
46use crate::tiling::*;
47use crate::transform::*;
48use crate::util::*;
49use crate::wasm_bindgen::*;
50
51#[allow(dead_code)]
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum CDEFSearchMethod {
54 PickFromQ,
55 FastSearch,
56 FullSearch,
57}
58
59#[inline(always)]
60fn poly2(q: f32, a: f32, b: f32, c: f32, max: i32) -> i32 {
61 clamp((q * q).mul_add(a, q.mul_add(b, c)).round() as i32, 0, max)
62}
63
64pub static TEMPORAL_DELIMITER: [u8; 2] = [0x12, 0x00];
65
66const MAX_NUM_TEMPORAL_LAYERS: usize = 8;
67const MAX_NUM_SPATIAL_LAYERS: usize = 4;
68const MAX_NUM_OPERATING_POINTS: usize =
69 MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS;
70
71pub const IMPORTANCE_BLOCK_SIZE: usize =
73 1 << (IMPORTANCE_BLOCK_TO_BLOCK_SHIFT + BLOCK_TO_PLANE_SHIFT);
74
75#[derive(Debug, Clone)]
76pub struct ReferenceFrame<T: Pixel> {
77 pub order_hint: u32,
78 pub width: u32,
79 pub height: u32,
80 pub render_width: u32,
81 pub render_height: u32,
82 pub frame: Arc<Frame<T>>,
83 pub input_hres: Arc<Plane<T>>,
84 pub input_qres: Arc<Plane<T>>,
85 pub cdfs: CDFContext,
86 pub frame_me_stats: RefMEStats,
87 pub output_frameno: u64,
88 pub segmentation: SegmentationState,
89}
90
91#[derive(Debug, Clone, Default)]
92pub struct ReferenceFramesSet<T: Pixel> {
93 pub frames: [Option<Arc<ReferenceFrame<T>>>; REF_FRAMES],
94 pub deblock: [DeblockState; REF_FRAMES],
95}
96
97impl<T: Pixel> ReferenceFramesSet<T> {
98 pub fn new() -> Self {
99 Self { frames: Default::default(), deblock: Default::default() }
100 }
101}
102
103#[wasm_bindgen]
104#[derive(
105 ArgEnum, Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default,
106)]
107#[repr(C)]
108pub enum Tune {
109 Psnr,
110 #[default]
111 Psychovisual,
112}
113
114const FRAME_ID_LENGTH: u32 = 15;
115const DELTA_FRAME_ID_LENGTH: u32 = 14;
116
117#[derive(Copy, Clone, Debug)]
118pub struct Sequence {
119 pub profile: u8,
121 pub num_bits_width: u32,
122 pub num_bits_height: u32,
123 pub bit_depth: usize,
124 pub chroma_sampling: ChromaSampling,
125 pub chroma_sample_position: ChromaSamplePosition,
126 pub pixel_range: PixelRange,
127 pub color_description: Option<ColorDescription>,
128 pub mastering_display: Option<MasteringDisplay>,
129 pub content_light: Option<ContentLight>,
130 pub max_frame_width: u32,
131 pub max_frame_height: u32,
132 pub frame_id_numbers_present_flag: bool,
133 pub frame_id_length: u32,
134 pub delta_frame_id_length: u32,
135 pub use_128x128_superblock: bool,
136 pub order_hint_bits_minus_1: u32,
137 pub force_screen_content_tools: u32,
141 pub force_integer_mv: u32,
145 pub still_picture: bool,
147 pub reduced_still_picture_hdr: bool,
149 pub enable_filter_intra: bool,
151 pub enable_intra_edge_filter: bool,
153 pub enable_interintra_compound: bool,
155 pub enable_masked_compound: bool,
157 pub enable_dual_filter: bool,
160 pub enable_order_hint: bool,
165 pub enable_jnt_comp: bool,
168 pub enable_ref_frame_mvs: bool,
171 pub enable_warped_motion: bool,
174 pub enable_superres: bool,
180 pub enable_cdef: bool,
182 pub enable_restoration: bool,
184 pub enable_large_lru: bool,
186 pub enable_delayed_loopfilter_rdo: bool,
188 pub operating_points_cnt_minus_1: usize,
189 pub operating_point_idc: [u16; MAX_NUM_OPERATING_POINTS],
190 pub display_model_info_present_flag: bool,
191 pub decoder_model_info_present_flag: bool,
192 pub level_idx: [u8; MAX_NUM_OPERATING_POINTS],
193 pub tier: [usize; MAX_NUM_OPERATING_POINTS],
195 pub film_grain_params_present: bool,
196 pub timing_info_present: bool,
197 pub tiling: TilingInfo,
198 pub time_base: Rational,
199}
200
201impl Sequence {
202 pub fn new(config: &EncoderConfig) -> Sequence {
206 let width_bits = 32 - (config.width as u32).leading_zeros();
207 let height_bits = 32 - (config.height as u32).leading_zeros();
208 assert!(width_bits <= 16);
209 assert!(height_bits <= 16);
210
211 let profile = if config.bit_depth == 12
212 || config.chroma_sampling == ChromaSampling::Cs422
213 {
214 2
215 } else {
216 u8::from(config.chroma_sampling == ChromaSampling::Cs444)
217 };
218
219 let operating_point_idc: [u16; MAX_NUM_OPERATING_POINTS] =
220 [0; MAX_NUM_OPERATING_POINTS];
221 let level_idx: [u8; MAX_NUM_OPERATING_POINTS] =
222 if let Some(level_idx) = config.level_idx {
223 [level_idx; MAX_NUM_OPERATING_POINTS]
224 } else {
225 [31; MAX_NUM_OPERATING_POINTS]
226 };
227 let tier: [usize; MAX_NUM_OPERATING_POINTS] =
228 [0; MAX_NUM_OPERATING_POINTS];
229
230 let enable_restoration_filters = config.width >= 32 && config.height >= 32;
233 let use_128x128_superblock = false;
234
235 let frame_rate = config.frame_rate();
236 let sb_size_log2 = Self::sb_size_log2(use_128x128_superblock);
237
238 let mut tiling = TilingInfo::from_target_tiles(
239 sb_size_log2,
240 config.width,
241 config.height,
242 frame_rate,
243 TilingInfo::tile_log2(1, config.tile_cols).unwrap(),
244 TilingInfo::tile_log2(1, config.tile_rows).unwrap(),
245 config.chroma_sampling == ChromaSampling::Cs422,
246 );
247
248 if config.tiles > 0 {
249 let mut tile_rows_log2 = 0;
250 let mut tile_cols_log2 = 0;
251 while (tile_rows_log2 < tiling.max_tile_rows_log2)
252 || (tile_cols_log2 < tiling.max_tile_cols_log2)
253 {
254 tiling = TilingInfo::from_target_tiles(
255 sb_size_log2,
256 config.width,
257 config.height,
258 frame_rate,
259 tile_cols_log2,
260 tile_rows_log2,
261 config.chroma_sampling == ChromaSampling::Cs422,
262 );
263
264 if tiling.rows * tiling.cols >= config.tiles {
265 break;
266 };
267
268 if ((tiling.tile_height_sb >= tiling.tile_width_sb)
269 && (tiling.tile_rows_log2 < tiling.max_tile_rows_log2))
270 || (tile_cols_log2 >= tiling.max_tile_cols_log2)
271 {
272 tile_rows_log2 += 1;
273 } else {
274 tile_cols_log2 += 1;
275 }
276 }
277 }
278
279 Sequence {
280 tiling,
281 profile,
282 num_bits_width: width_bits,
283 num_bits_height: height_bits,
284 bit_depth: config.bit_depth,
285 chroma_sampling: config.chroma_sampling,
286 chroma_sample_position: config.chroma_sample_position,
287 pixel_range: config.pixel_range,
288 color_description: config.color_description,
289 mastering_display: config.mastering_display,
290 content_light: config.content_light,
291 max_frame_width: config.width as u32,
292 max_frame_height: config.height as u32,
293 frame_id_numbers_present_flag: false,
294 frame_id_length: FRAME_ID_LENGTH,
295 delta_frame_id_length: DELTA_FRAME_ID_LENGTH,
296 use_128x128_superblock,
297 order_hint_bits_minus_1: 5,
298 force_screen_content_tools: if config.still_picture { 2 } else { 0 },
299 force_integer_mv: 2,
300 still_picture: config.still_picture,
301 reduced_still_picture_hdr: config.still_picture,
302 enable_filter_intra: false,
303 enable_intra_edge_filter: true,
304 enable_interintra_compound: false,
305 enable_masked_compound: false,
306 enable_dual_filter: false,
307 enable_order_hint: !config.still_picture,
308 enable_jnt_comp: false,
309 enable_ref_frame_mvs: false,
310 enable_warped_motion: false,
311 enable_superres: false,
312 enable_cdef: config.speed_settings.cdef && enable_restoration_filters,
313 enable_restoration: config.speed_settings.lrf
314 && enable_restoration_filters,
315 enable_large_lru: true,
316 enable_delayed_loopfilter_rdo: true,
317 operating_points_cnt_minus_1: 0,
318 operating_point_idc,
319 display_model_info_present_flag: false,
320 decoder_model_info_present_flag: false,
321 level_idx,
322 tier,
323 film_grain_params_present: config
324 .film_grain_params
325 .as_ref()
326 .map(|entries| !entries.is_empty())
327 .unwrap_or(false),
328 timing_info_present: config.enable_timing_info,
329 time_base: config.time_base,
330 }
331 }
332
333 pub const fn get_relative_dist(&self, a: u32, b: u32) -> i32 {
334 let diff = a as i32 - b as i32;
335 let m = 1 << self.order_hint_bits_minus_1;
336 (diff & (m - 1)) - (diff & m)
337 }
338
339 pub fn get_skip_mode_allowed<T: Pixel>(
340 &self, fi: &FrameInvariants<T>, inter_cfg: &InterConfig,
341 reference_select: bool,
342 ) -> bool {
343 if fi.intra_only || !reference_select || !self.enable_order_hint {
344 return false;
345 }
346
347 let mut forward_idx: isize = -1;
348 let mut backward_idx: isize = -1;
349 let mut forward_hint = 0;
350 let mut backward_hint = 0;
351
352 for i in inter_cfg.allowed_ref_frames().iter().map(|rf| rf.to_index()) {
353 if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[i] as usize] {
354 let ref_hint = rec.order_hint;
355
356 if self.get_relative_dist(ref_hint, fi.order_hint) < 0 {
357 if forward_idx < 0
358 || self.get_relative_dist(ref_hint, forward_hint) > 0
359 {
360 forward_idx = i as isize;
361 forward_hint = ref_hint;
362 }
363 } else if self.get_relative_dist(ref_hint, fi.order_hint) > 0
364 && (backward_idx < 0
365 || self.get_relative_dist(ref_hint, backward_hint) > 0)
366 {
367 backward_idx = i as isize;
368 backward_hint = ref_hint;
369 }
370 }
371 }
372
373 if forward_idx < 0 {
374 false
375 } else if backward_idx >= 0 {
376 true
378 } else {
379 let mut second_forward_idx: isize = -1;
380 let mut second_forward_hint = 0;
381
382 for i in inter_cfg.allowed_ref_frames().iter().map(|rf| rf.to_index()) {
383 if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[i] as usize]
384 {
385 let ref_hint = rec.order_hint;
386
387 if self.get_relative_dist(ref_hint, forward_hint) < 0
388 && (second_forward_idx < 0
389 || self.get_relative_dist(ref_hint, second_forward_hint) > 0)
390 {
391 second_forward_idx = i as isize;
392 second_forward_hint = ref_hint;
393 }
394 }
395 }
396
397 second_forward_idx >= 0
399 }
400 }
401
402 #[inline(always)]
403 const fn sb_size_log2(use_128x128_superblock: bool) -> usize {
404 6 + (use_128x128_superblock as usize)
405 }
406}
407
408#[derive(Debug, Clone)]
409pub struct FrameState<T: Pixel> {
410 pub sb_size_log2: usize,
411 pub input: Arc<Frame<T>>,
412 pub input_hres: Arc<Plane<T>>, pub input_qres: Arc<Plane<T>>, pub rec: Arc<Frame<T>>,
415 pub cdfs: CDFContext,
416 pub context_update_tile_id: usize, pub max_tile_size_bytes: u32,
418 pub deblock: DeblockState,
419 pub segmentation: SegmentationState,
420 pub restoration: RestorationState,
421 pub frame_me_stats: RefMEStats,
424 pub enc_stats: EncoderStats,
425}
426
427impl<T: Pixel> FrameState<T> {
428 pub fn new(fi: &FrameInvariants<T>) -> Self {
429 FrameState::new_with_frame(
431 fi,
432 Arc::new(Frame::new(fi.width, fi.height, fi.sequence.chroma_sampling)),
433 )
434 }
435
436 pub fn new_with_frame_and_me_stats_and_rec(
444 fi: &FrameInvariants<T>, frame: Arc<Frame<T>>, me_stats: RefMEStats,
445 rec: Arc<Frame<T>>,
446 ) -> Self {
447 let rs = RestorationState::new(fi, &frame);
448
449 let hres = Plane::new(0, 0, 0, 0, 0, 0);
450 let qres = Plane::new(0, 0, 0, 0, 0, 0);
451
452 Self {
453 sb_size_log2: fi.sb_size_log2(),
454 input: frame,
455 input_hres: Arc::new(hres),
456 input_qres: Arc::new(qres),
457 rec,
458 cdfs: CDFContext::new(0),
459 context_update_tile_id: 0,
460 max_tile_size_bytes: 0,
461 deblock: Default::default(),
462 segmentation: Default::default(),
463 restoration: rs,
464 frame_me_stats: me_stats,
465 enc_stats: Default::default(),
466 }
467 }
468
469 pub fn new_with_frame(
470 fi: &FrameInvariants<T>, frame: Arc<Frame<T>>,
471 ) -> Self {
472 let rs = RestorationState::new(fi, &frame);
473 let luma_width = frame.planes[0].cfg.width;
474 let luma_height = frame.planes[0].cfg.height;
475
476 let hres = frame.planes[0].downsampled(fi.width, fi.height);
477 let qres = hres.downsampled(fi.width, fi.height);
478
479 Self {
480 sb_size_log2: fi.sb_size_log2(),
481 input: frame,
482 input_hres: Arc::new(hres),
483 input_qres: Arc::new(qres),
484 rec: Arc::new(Frame::new(
485 luma_width,
486 luma_height,
487 fi.sequence.chroma_sampling,
488 )),
489 cdfs: CDFContext::new(0),
490 context_update_tile_id: 0,
491 max_tile_size_bytes: 0,
492 deblock: Default::default(),
493 segmentation: Default::default(),
494 restoration: rs,
495 frame_me_stats: FrameMEStats::new_arc_array(fi.w_in_b, fi.h_in_b),
496 enc_stats: Default::default(),
497 }
498 }
499
500 pub fn apply_tile_state_mut<F, R>(&mut self, f: F) -> R
501 where
502 F: FnOnce(&mut TileStateMut<'_, T>) -> R,
503 {
504 let PlaneConfig { width, height, .. } = self.rec.planes[0].cfg;
505 let sbo_0 = PlaneSuperBlockOffset(SuperBlockOffset { x: 0, y: 0 });
506 let frame_me_stats = self.frame_me_stats.clone();
507 let frame_me_stats = &mut *frame_me_stats.write().expect("poisoned lock");
508 let ts = &mut TileStateMut::new(
509 self,
510 sbo_0,
511 self.sb_size_log2,
512 width,
513 height,
514 frame_me_stats,
515 );
516
517 f(ts)
518 }
519}
520
521#[derive(Copy, Clone, Debug)]
522pub struct DeblockState {
523 pub levels: [u8; MAX_PLANES + 1], pub sharpness: u8,
525 pub deltas_enabled: bool,
526 pub delta_updates_enabled: bool,
527 pub ref_deltas: [i8; REF_FRAMES],
528 pub mode_deltas: [i8; 2],
529 pub block_deltas_enabled: bool,
530 pub block_delta_shift: u8,
531 pub block_delta_multi: bool,
532}
533
534impl Default for DeblockState {
535 fn default() -> Self {
536 DeblockState {
537 levels: [8, 8, 4, 4],
538 sharpness: 0,
539 deltas_enabled: false, delta_updates_enabled: false,
541 ref_deltas: [1, 0, 0, 0, 0, -1, -1, -1],
542 mode_deltas: [0, 0],
543 block_deltas_enabled: false,
544 block_delta_shift: 0,
545 block_delta_multi: false,
546 }
547 }
548}
549
550#[derive(Copy, Clone, Debug, Default)]
551pub struct SegmentationState {
552 pub enabled: bool,
553 pub update_data: bool,
554 pub update_map: bool,
555 pub preskip: bool,
556 pub last_active_segid: u8,
557 pub features: [[bool; SegLvl::SEG_LVL_MAX as usize]; 8],
558 pub data: [[i16; SegLvl::SEG_LVL_MAX as usize]; 8],
559 pub threshold: [DistortionScale; 7],
560 pub min_segment: u8,
561 pub max_segment: u8,
562}
563
564impl SegmentationState {
565 #[profiling::function]
566 pub fn update_threshold(&mut self, base_q_idx: u8, bd: usize) {
567 let base_ac_q = ac_q(base_q_idx, 0, bd).get() as u64;
568 let real_ac_q = ArrayVec::<_, MAX_SEGMENTS>::from_iter(
569 self.data[..=self.max_segment as usize].iter().map(|data| {
570 ac_q(base_q_idx, data[SegLvl::SEG_LVL_ALT_Q as usize] as i8, bd).get()
571 as u64
572 }),
573 );
574 self.threshold.fill(DistortionScale(0));
575 for ((q1, q2), threshold) in
576 real_ac_q.iter().skip(1).zip(&real_ac_q).zip(&mut self.threshold)
577 {
578 *threshold = DistortionScale::new(base_ac_q.pow(2), q1 * q2);
579 }
580 }
581
582 #[cfg(feature = "dump_lookahead_data")]
583 pub fn dump_threshold(
584 &self, data_location: std::path::PathBuf, input_frameno: u64,
585 ) {
586 use byteorder::{NativeEndian, WriteBytesExt};
587 let file_name = format!("{:010}-thresholds", input_frameno);
588 let max_segment = self.max_segment;
589 let mut buf = vec![];
591 buf.write_u64::<NativeEndian>(max_segment as u64).unwrap();
592 for &v in &self.threshold[..max_segment as usize] {
593 buf.write_u32::<NativeEndian>(v.0).unwrap();
594 }
595 ::std::fs::write(data_location.join(file_name).with_extension("bin"), buf)
596 .unwrap();
597 }
598}
599
600#[allow(dead_code)]
602#[derive(Debug, Clone)]
603pub struct FrameInvariants<T: Pixel> {
604 pub sequence: Arc<Sequence>,
605 pub config: Arc<EncoderConfig>,
606 pub width: usize,
607 pub height: usize,
608 pub render_width: u32,
609 pub render_height: u32,
610 pub frame_size_override_flag: bool,
611 pub render_and_frame_size_different: bool,
612 pub sb_width: usize,
613 pub sb_height: usize,
614 pub w_in_b: usize,
615 pub h_in_b: usize,
616 pub input_frameno: u64,
617 pub order_hint: u32,
618 pub show_frame: bool,
619 pub showable_frame: bool,
620 pub error_resilient: bool,
621 pub intra_only: bool,
622 pub allow_high_precision_mv: bool,
623 pub frame_type: FrameType,
624 pub frame_to_show_map_idx: u32,
625 pub use_reduced_tx_set: bool,
626 pub reference_mode: ReferenceMode,
627 pub use_prev_frame_mvs: bool,
628 pub partition_range: PartitionRange,
629 pub globalmv_transformation_type: [GlobalMVMode; INTER_REFS_PER_FRAME],
630 pub num_tg: usize,
631 pub large_scale_tile: bool,
632 pub disable_cdf_update: bool,
633 pub allow_screen_content_tools: u32,
634 pub force_integer_mv: u32,
635 pub primary_ref_frame: u32,
636 pub refresh_frame_flags: u32, pub allow_intrabc: bool,
640 pub use_ref_frame_mvs: bool,
641 pub is_filter_switchable: bool,
642 pub is_motion_mode_switchable: bool,
643 pub disable_frame_end_update_cdf: bool,
644 pub allow_warped_motion: bool,
645 pub cdef_search_method: CDEFSearchMethod,
646 pub cdef_damping: u8,
647 pub cdef_bits: u8,
648 pub cdef_y_strengths: [u8; 8],
649 pub cdef_uv_strengths: [u8; 8],
650 pub delta_q_present: bool,
651 pub ref_frames: [u8; INTER_REFS_PER_FRAME],
652 pub ref_frame_sign_bias: [bool; INTER_REFS_PER_FRAME],
653 pub rec_buffer: ReferenceFramesSet<T>,
654 pub base_q_idx: u8,
655 pub dc_delta_q: [i8; 3],
656 pub ac_delta_q: [i8; 3],
657 pub lambda: f64,
658 pub me_lambda: f64,
659 pub dist_scale: [DistortionScale; 3],
660 pub me_range_scale: u8,
661 pub use_tx_domain_distortion: bool,
662 pub use_tx_domain_rate: bool,
663 pub idx_in_group_output: u64,
664 pub pyramid_level: u64,
665 pub enable_early_exit: bool,
666 pub tx_mode_select: bool,
667 pub enable_inter_txfm_split: bool,
668 pub default_filter: FilterMode,
669 pub enable_segmentation: bool,
670 pub t35_metadata: Box<[T35]>,
671 pub cpu_feature_level: crate::cpu_features::CpuFeatureLevel,
673
674 pub coded_frame_data: Option<CodedFrameData<T>>,
677}
678
679#[derive(Debug, Clone)]
686pub struct CodedFrameData<T: Pixel> {
687 pub lookahead_rec_buffer: ReferenceFramesSet<T>,
693 pub w_in_imp_b: usize,
695 pub h_in_imp_b: usize,
697 pub lookahead_intra_costs: Box<[u32]>,
699 pub block_importances: Box<[f32]>,
703 pub distortion_scales: Box<[DistortionScale]>,
705 pub activity_scales: Box<[DistortionScale]>,
707 pub activity_mask: ActivityMask,
708 pub spatiotemporal_scores: Box<[DistortionScale]>,
710}
711
712impl<T: Pixel> CodedFrameData<T> {
713 pub fn new(fi: &FrameInvariants<T>) -> CodedFrameData<T> {
714 let w_in_imp_b = fi.w_in_b / 2;
716 let h_in_imp_b = fi.h_in_b / 2;
717
718 CodedFrameData {
719 lookahead_rec_buffer: ReferenceFramesSet::new(),
720 w_in_imp_b,
721 h_in_imp_b,
722 lookahead_intra_costs: Box::new([]),
724 block_importances: vec![0.; w_in_imp_b * h_in_imp_b].into_boxed_slice(),
726 distortion_scales: vec![
727 DistortionScale::default();
728 w_in_imp_b * h_in_imp_b
729 ]
730 .into_boxed_slice(),
731 activity_scales: vec![
732 DistortionScale::default();
733 w_in_imp_b * h_in_imp_b
734 ]
735 .into_boxed_slice(),
736 activity_mask: Default::default(),
737 spatiotemporal_scores: Default::default(),
738 }
739 }
740
741 #[profiling::function]
744 pub fn compute_spatiotemporal_scores(&mut self) -> i64 {
745 let mut scores = self
746 .distortion_scales
747 .iter()
748 .zip(self.activity_scales.iter())
749 .map(|(&d, &a)| d * a)
750 .collect::<Box<_>>();
751
752 let inv_mean = DistortionScale::inv_mean(&scores);
753
754 for score in scores.iter_mut() {
755 *score *= inv_mean;
756 }
757
758 for scale in self.distortion_scales.iter_mut() {
759 *scale *= inv_mean;
760 }
761
762 self.spatiotemporal_scores = scores;
763
764 inv_mean.blog64() >> 1
765 }
766
767 #[profiling::function]
770 pub fn compute_temporal_scores(&mut self) -> i64 {
771 let inv_mean = DistortionScale::inv_mean(&self.distortion_scales);
772 for scale in self.distortion_scales.iter_mut() {
773 *scale *= inv_mean;
774 }
775 self.spatiotemporal_scores.clone_from(&self.distortion_scales);
776 inv_mean.blog64() >> 1
777 }
778
779 #[cfg(feature = "dump_lookahead_data")]
780 pub fn dump_scales(
781 &self, data_location: std::path::PathBuf, scales: Scales,
782 input_frameno: u64,
783 ) {
784 use byteorder::{NativeEndian, WriteBytesExt};
785 let file_name = format!(
786 "{:010}-{}",
787 input_frameno,
788 match scales {
789 Scales::ActivityScales => "activity_scales",
790 Scales::DistortionScales => "distortion_scales",
791 Scales::SpatiotemporalScales => "spatiotemporal_scales",
792 }
793 );
794 let mut buf = vec![];
796 buf.write_u64::<NativeEndian>(self.w_in_imp_b as u64).unwrap();
797 buf.write_u64::<NativeEndian>(self.h_in_imp_b as u64).unwrap();
798 for &v in match scales {
799 Scales::ActivityScales => &self.activity_scales[..],
800 Scales::DistortionScales => &self.distortion_scales[..],
801 Scales::SpatiotemporalScales => &self.spatiotemporal_scores[..],
802 } {
803 buf.write_u32::<NativeEndian>(v.0).unwrap();
804 }
805 ::std::fs::write(data_location.join(file_name).with_extension("bin"), buf)
806 .unwrap();
807 }
808}
809
810#[cfg(feature = "dump_lookahead_data")]
811pub enum Scales {
812 ActivityScales,
813 DistortionScales,
814 SpatiotemporalScales,
815}
816
817pub(crate) const fn pos_to_lvl(pos: u64, pyramid_depth: u64) -> u64 {
818 pyramid_depth - (pos | (1 << pyramid_depth)).trailing_zeros() as u64
826}
827
828impl<T: Pixel> FrameInvariants<T> {
829 #[allow(clippy::erasing_op, clippy::identity_op)]
830 pub fn new(config: Arc<EncoderConfig>, sequence: Arc<Sequence>) -> Self {
834 assert!(
835 sequence.bit_depth <= mem::size_of::<T>() * 8,
836 "bit depth cannot fit into u8"
837 );
838
839 let (width, height) = (config.width, config.height);
840 let frame_size_override_flag = width as u32 != sequence.max_frame_width
841 || height as u32 != sequence.max_frame_height;
842
843 let (render_width, render_height) = config.render_size();
844 let render_and_frame_size_different =
845 render_width != width || render_height != height;
846
847 let use_reduced_tx_set = config.speed_settings.transform.reduced_tx_set;
848 let use_tx_domain_distortion = config.tune == Tune::Psnr
849 && config.speed_settings.transform.tx_domain_distortion;
850 let use_tx_domain_rate = config.speed_settings.transform.tx_domain_rate;
851
852 let w_in_b = 2 * config.width.align_power_of_two_and_shift(3); let h_in_b = 2 * config.height.align_power_of_two_and_shift(3); Self {
856 width,
857 height,
858 render_width: render_width as u32,
859 render_height: render_height as u32,
860 frame_size_override_flag,
861 render_and_frame_size_different,
862 sb_width: width.align_power_of_two_and_shift(6),
863 sb_height: height.align_power_of_two_and_shift(6),
864 w_in_b,
865 h_in_b,
866 input_frameno: 0,
867 order_hint: 0,
868 show_frame: true,
869 showable_frame: !sequence.reduced_still_picture_hdr,
870 error_resilient: false,
871 intra_only: true,
872 allow_high_precision_mv: false,
873 frame_type: FrameType::KEY,
874 frame_to_show_map_idx: 0,
875 use_reduced_tx_set,
876 reference_mode: ReferenceMode::SINGLE,
877 use_prev_frame_mvs: false,
878 partition_range: config.speed_settings.partition.partition_range,
879 globalmv_transformation_type: [GlobalMVMode::IDENTITY;
880 INTER_REFS_PER_FRAME],
881 num_tg: 1,
882 large_scale_tile: false,
883 disable_cdf_update: false,
884 allow_screen_content_tools: sequence.force_screen_content_tools,
885 force_integer_mv: 1,
886 primary_ref_frame: PRIMARY_REF_NONE,
887 refresh_frame_flags: ALL_REF_FRAMES_MASK,
888 allow_intrabc: false,
889 use_ref_frame_mvs: false,
890 is_filter_switchable: false,
891 is_motion_mode_switchable: false, disable_frame_end_update_cdf: sequence.reduced_still_picture_hdr,
893 allow_warped_motion: false,
894 cdef_search_method: CDEFSearchMethod::PickFromQ,
895 cdef_damping: 3,
896 cdef_bits: 0,
897 cdef_y_strengths: [
898 0 * 4 + 0,
899 1 * 4 + 0,
900 2 * 4 + 1,
901 3 * 4 + 1,
902 5 * 4 + 2,
903 7 * 4 + 3,
904 10 * 4 + 3,
905 13 * 4 + 3,
906 ],
907 cdef_uv_strengths: [
908 0 * 4 + 0,
909 1 * 4 + 0,
910 2 * 4 + 1,
911 3 * 4 + 1,
912 5 * 4 + 2,
913 7 * 4 + 3,
914 10 * 4 + 3,
915 13 * 4 + 3,
916 ],
917 delta_q_present: false,
918 ref_frames: [0; INTER_REFS_PER_FRAME],
919 ref_frame_sign_bias: [false; INTER_REFS_PER_FRAME],
920 rec_buffer: ReferenceFramesSet::new(),
921 base_q_idx: config.quantizer as u8,
922 dc_delta_q: [0; 3],
923 ac_delta_q: [0; 3],
924 lambda: 0.0,
925 dist_scale: Default::default(),
926 me_lambda: 0.0,
927 me_range_scale: 1,
928 use_tx_domain_distortion,
929 use_tx_domain_rate,
930 idx_in_group_output: 0,
931 pyramid_level: 0,
932 enable_early_exit: true,
933 tx_mode_select: false,
934 default_filter: FilterMode::REGULAR,
935 cpu_feature_level: Default::default(),
936 enable_segmentation: config.speed_settings.segmentation
937 != SegmentationLevel::Disabled,
938 enable_inter_txfm_split: config
939 .speed_settings
940 .transform
941 .enable_inter_tx_split,
942 t35_metadata: Box::new([]),
943 sequence,
944 config,
945 coded_frame_data: None,
946 }
947 }
948
949 pub fn new_key_frame(
950 config: Arc<EncoderConfig>, sequence: Arc<Sequence>,
951 gop_input_frameno_start: u64, t35_metadata: Box<[T35]>,
952 ) -> Self {
953 let tx_mode_select = config.speed_settings.transform.rdo_tx_decision;
954 let mut fi = Self::new(config, sequence);
955 fi.input_frameno = gop_input_frameno_start;
956 fi.tx_mode_select = tx_mode_select;
957 fi.coded_frame_data = Some(CodedFrameData::new(&fi));
958 fi.t35_metadata = t35_metadata;
959 fi
960 }
961
962 pub(crate) fn new_inter_frame(
965 previous_coded_fi: &Self, inter_cfg: &InterConfig,
966 gop_input_frameno_start: u64, output_frameno_in_gop: u64,
967 next_keyframe_input_frameno: u64, error_resilient: bool,
968 t35_metadata: Box<[T35]>,
969 ) -> Option<Self> {
970 let input_frameno = inter_cfg
971 .get_input_frameno(output_frameno_in_gop, gop_input_frameno_start);
972 if input_frameno >= next_keyframe_input_frameno {
973 return None;
975 }
976
977 let mut fi = previous_coded_fi.clone_without_coded_data();
980 fi.intra_only = false;
981 fi.force_integer_mv = 0; fi.idx_in_group_output =
983 inter_cfg.get_idx_in_group_output(output_frameno_in_gop);
984 fi.tx_mode_select = fi.enable_inter_txfm_split;
985
986 let show_existing_frame =
987 inter_cfg.get_show_existing_frame(fi.idx_in_group_output);
988 if !show_existing_frame {
989 fi.coded_frame_data.clone_from(&previous_coded_fi.coded_frame_data);
990 }
991
992 fi.order_hint =
993 inter_cfg.get_order_hint(output_frameno_in_gop, fi.idx_in_group_output);
994
995 fi.pyramid_level = inter_cfg.get_level(fi.idx_in_group_output);
996
997 fi.frame_type = if (inter_cfg.switch_frame_interval > 0)
998 && (output_frameno_in_gop % inter_cfg.switch_frame_interval == 0)
999 && (fi.pyramid_level == 0)
1000 {
1001 FrameType::SWITCH
1002 } else {
1003 FrameType::INTER
1004 };
1005 fi.error_resilient =
1006 if fi.frame_type == FrameType::SWITCH { true } else { error_resilient };
1007
1008 fi.frame_size_override_flag = if fi.frame_type == FrameType::SWITCH {
1009 true
1010 } else if fi.sequence.reduced_still_picture_hdr {
1011 false
1012 } else if fi.frame_type == FrameType::INTER
1013 && !fi.error_resilient
1014 && fi.render_and_frame_size_different
1015 {
1016 true
1018 } else {
1019 fi.width as u32 != fi.sequence.max_frame_width
1020 || fi.height as u32 != fi.sequence.max_frame_height
1021 };
1022
1023 let slot_idx = inter_cfg.get_slot_idx(fi.pyramid_level, fi.order_hint);
1025 fi.show_frame = inter_cfg.get_show_frame(fi.idx_in_group_output);
1026 fi.t35_metadata = if fi.show_frame { t35_metadata } else { Box::new([]) };
1027 fi.frame_to_show_map_idx = slot_idx;
1028 fi.refresh_frame_flags = if fi.frame_type == FrameType::SWITCH {
1029 ALL_REF_FRAMES_MASK
1030 } else if fi.is_show_existing_frame() {
1031 0
1032 } else {
1033 1 << slot_idx
1034 };
1035
1036 let second_ref_frame =
1037 if fi.idx_in_group_output == 0 { LAST2_FRAME } else { ALTREF_FRAME };
1038 let ref_in_previous_group = LAST3_FRAME;
1039
1040 fi.primary_ref_frame = if fi.error_resilient || (fi.pyramid_level > 2) {
1042 PRIMARY_REF_NONE
1043 } else {
1044 (ref_in_previous_group.to_index()) as u32
1045 };
1046
1047 if fi.pyramid_level == 0 {
1048 fi.ref_frames = [
1051 (slot_idx + 4 - 1) as u8 % 4
1057 ; INTER_REFS_PER_FRAME];
1058 if inter_cfg.multiref {
1059 fi.ref_frames[second_ref_frame.to_index()] =
1061 (slot_idx + 4 - 2) as u8 % 4;
1062 }
1063 } else {
1064 debug_assert!(inter_cfg.multiref);
1065
1066 fi.ref_frames = [{
1069 let oh = fi.order_hint
1070 - (inter_cfg.group_input_len as u32 >> fi.pyramid_level);
1071 let lvl1 = pos_to_lvl(oh as u64, inter_cfg.pyramid_depth);
1072 if lvl1 == 0 {
1073 ((oh >> inter_cfg.pyramid_depth) % 4) as u8
1074 } else {
1075 3 + lvl1 as u8
1076 }
1077 }; INTER_REFS_PER_FRAME];
1078 fi.ref_frames[second_ref_frame.to_index()] = {
1080 let oh = fi.order_hint
1081 + (inter_cfg.group_input_len as u32 >> fi.pyramid_level);
1082 let lvl2 = pos_to_lvl(oh as u64, inter_cfg.pyramid_depth);
1083 if lvl2 == 0 {
1084 ((oh >> inter_cfg.pyramid_depth) % 4) as u8
1085 } else {
1086 3 + lvl2 as u8
1087 }
1088 };
1089 fi.ref_frames[ref_in_previous_group.to_index()] = slot_idx as u8;
1092 }
1093
1094 fi.set_ref_frame_sign_bias();
1095
1096 fi.reference_mode = if inter_cfg.multiref && fi.idx_in_group_output != 0 {
1097 ReferenceMode::SELECT
1098 } else {
1099 ReferenceMode::SINGLE
1100 };
1101 fi.input_frameno = input_frameno;
1102 fi.me_range_scale = (inter_cfg.group_input_len >> fi.pyramid_level) as u8;
1103
1104 if fi.show_frame || fi.showable_frame {
1105 let cur_frame_time = fi.frame_timestamp();
1106 if let Some(params) =
1108 Arc::make_mut(&mut fi.config).get_film_grain_mut_at(cur_frame_time)
1109 {
1110 params.random_seed = params.random_seed.wrapping_add(3248);
1111 if params.random_seed == 0 {
1112 params.random_seed = DEFAULT_GRAIN_SEED;
1113 }
1114 }
1115 }
1116
1117 Some(fi)
1118 }
1119
1120 pub fn is_show_existing_frame(&self) -> bool {
1121 self.coded_frame_data.is_none()
1122 }
1123
1124 pub fn clone_without_coded_data(&self) -> Self {
1125 Self {
1126 coded_frame_data: None,
1127
1128 sequence: self.sequence.clone(),
1129 config: self.config.clone(),
1130 width: self.width,
1131 height: self.height,
1132 render_width: self.render_width,
1133 render_height: self.render_height,
1134 frame_size_override_flag: self.frame_size_override_flag,
1135 render_and_frame_size_different: self.render_and_frame_size_different,
1136 sb_width: self.sb_width,
1137 sb_height: self.sb_height,
1138 w_in_b: self.w_in_b,
1139 h_in_b: self.h_in_b,
1140 input_frameno: self.input_frameno,
1141 order_hint: self.order_hint,
1142 show_frame: self.show_frame,
1143 showable_frame: self.showable_frame,
1144 error_resilient: self.error_resilient,
1145 intra_only: self.intra_only,
1146 allow_high_precision_mv: self.allow_high_precision_mv,
1147 frame_type: self.frame_type,
1148 frame_to_show_map_idx: self.frame_to_show_map_idx,
1149 use_reduced_tx_set: self.use_reduced_tx_set,
1150 reference_mode: self.reference_mode,
1151 use_prev_frame_mvs: self.use_prev_frame_mvs,
1152 partition_range: self.partition_range,
1153 globalmv_transformation_type: self.globalmv_transformation_type,
1154 num_tg: self.num_tg,
1155 large_scale_tile: self.large_scale_tile,
1156 disable_cdf_update: self.disable_cdf_update,
1157 allow_screen_content_tools: self.allow_screen_content_tools,
1158 force_integer_mv: self.force_integer_mv,
1159 primary_ref_frame: self.primary_ref_frame,
1160 refresh_frame_flags: self.refresh_frame_flags,
1161 allow_intrabc: self.allow_intrabc,
1162 use_ref_frame_mvs: self.use_ref_frame_mvs,
1163 is_filter_switchable: self.is_filter_switchable,
1164 is_motion_mode_switchable: self.is_motion_mode_switchable,
1165 disable_frame_end_update_cdf: self.disable_frame_end_update_cdf,
1166 allow_warped_motion: self.allow_warped_motion,
1167 cdef_search_method: self.cdef_search_method,
1168 cdef_damping: self.cdef_damping,
1169 cdef_bits: self.cdef_bits,
1170 cdef_y_strengths: self.cdef_y_strengths,
1171 cdef_uv_strengths: self.cdef_uv_strengths,
1172 delta_q_present: self.delta_q_present,
1173 ref_frames: self.ref_frames,
1174 ref_frame_sign_bias: self.ref_frame_sign_bias,
1175 rec_buffer: self.rec_buffer.clone(),
1176 base_q_idx: self.base_q_idx,
1177 dc_delta_q: self.dc_delta_q,
1178 ac_delta_q: self.ac_delta_q,
1179 lambda: self.lambda,
1180 me_lambda: self.me_lambda,
1181 dist_scale: self.dist_scale,
1182 me_range_scale: self.me_range_scale,
1183 use_tx_domain_distortion: self.use_tx_domain_distortion,
1184 use_tx_domain_rate: self.use_tx_domain_rate,
1185 idx_in_group_output: self.idx_in_group_output,
1186 pyramid_level: self.pyramid_level,
1187 enable_early_exit: self.enable_early_exit,
1188 tx_mode_select: self.tx_mode_select,
1189 enable_inter_txfm_split: self.enable_inter_txfm_split,
1190 default_filter: self.default_filter,
1191 enable_segmentation: self.enable_segmentation,
1192 t35_metadata: self.t35_metadata.clone(),
1193 cpu_feature_level: self.cpu_feature_level,
1194 }
1195 }
1196
1197 pub fn set_ref_frame_sign_bias(&mut self) {
1198 for i in 0..INTER_REFS_PER_FRAME {
1199 self.ref_frame_sign_bias[i] = if !self.sequence.enable_order_hint {
1200 false
1201 } else if let Some(ref rec) =
1202 self.rec_buffer.frames[self.ref_frames[i] as usize]
1203 {
1204 let hint = rec.order_hint;
1205 self.sequence.get_relative_dist(hint, self.order_hint) > 0
1206 } else {
1207 false
1208 };
1209 }
1210 }
1211
1212 pub fn get_frame_subtype(&self) -> usize {
1213 if self.frame_type == FrameType::KEY {
1214 FRAME_SUBTYPE_I
1215 } else {
1216 FRAME_SUBTYPE_P + (self.pyramid_level as usize)
1217 }
1218 }
1219
1220 fn pick_strength_from_q(&mut self, qps: &QuantizerParameters) {
1221 self.cdef_damping = 3 + (self.base_q_idx >> 6);
1222 let q = bexp64(qps.log_target_q + q57(QSCALE)) as f32;
1223 let (y_f1, y_f2, uv_f1, uv_f2) = if !self.intra_only {
1225 (
1226 poly2(q, -0.0000023593946_f32, 0.0068615186_f32, 0.02709886_f32, 15),
1227 poly2(q, -0.00000057629734_f32, 0.0013993345_f32, 0.03831067_f32, 3),
1228 poly2(q, -0.0000007095069_f32, 0.0034628846_f32, 0.00887099_f32, 15),
1229 poly2(q, 0.00000023874085_f32, 0.00028223585_f32, 0.05576307_f32, 3),
1230 )
1231 } else {
1232 (
1233 poly2(q, 0.0000033731974_f32, 0.008070594_f32, 0.0187634_f32, 15),
1234 poly2(q, 0.0000029167343_f32, 0.0027798624_f32, 0.0079405_f32, 3),
1235 poly2(q, -0.0000130790995_f32, 0.012892405_f32, -0.00748388_f32, 15),
1236 poly2(q, 0.0000032651783_f32, 0.00035520183_f32, 0.00228092_f32, 3),
1237 )
1238 };
1239 self.cdef_y_strengths[0] = (y_f1 * CDEF_SEC_STRENGTHS as i32 + y_f2) as u8;
1240 self.cdef_uv_strengths[0] =
1241 (uv_f1 * CDEF_SEC_STRENGTHS as i32 + uv_f2) as u8;
1242 }
1243
1244 pub fn set_quantizers(&mut self, qps: &QuantizerParameters) {
1245 self.base_q_idx = qps.ac_qi[0];
1246 let base_q_idx = self.base_q_idx as i32;
1247 for pi in 0..3 {
1248 self.dc_delta_q[pi] = (qps.dc_qi[pi] as i32 - base_q_idx) as i8;
1249 self.ac_delta_q[pi] = (qps.ac_qi[pi] as i32 - base_q_idx) as i8;
1250 }
1251 self.lambda =
1252 qps.lambda * ((1 << (2 * (self.sequence.bit_depth - 8))) as f64);
1253 self.me_lambda = self.lambda.sqrt();
1254 self.dist_scale = qps.dist_scale.map(DistortionScale::from);
1255
1256 match self.cdef_search_method {
1257 CDEFSearchMethod::PickFromQ => {
1258 self.pick_strength_from_q(qps);
1259 }
1260 _ => unreachable!(),
1262 }
1263 }
1264
1265 #[inline(always)]
1266 pub fn sb_size_log2(&self) -> usize {
1267 self.sequence.tiling.sb_size_log2
1268 }
1269
1270 pub fn film_grain_params(&self) -> Option<&GrainTableSegment> {
1271 if !(self.show_frame || self.showable_frame) {
1272 return None;
1273 }
1274 let cur_frame_time = self.frame_timestamp();
1275 self.config.get_film_grain_at(cur_frame_time)
1276 }
1277
1278 pub fn frame_timestamp(&self) -> u64 {
1279 const TIMESTAMP_BASE_UNIT: u64 = 10_000_000;
1281
1282 self.input_frameno * TIMESTAMP_BASE_UNIT * self.sequence.time_base.num
1283 / self.sequence.time_base.den
1284 }
1285}
1286
1287impl<T: Pixel> fmt::Display for FrameInvariants<T> {
1288 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1289 write!(f, "Input Frame {} - {}", self.input_frameno, self.frame_type)
1290 }
1291}
1292
1293pub fn write_temporal_delimiter(packet: &mut dyn io::Write) -> io::Result<()> {
1297 packet.write_all(&TEMPORAL_DELIMITER)?;
1298 Ok(())
1299}
1300
1301fn write_key_frame_obus<T: Pixel>(
1302 packet: &mut dyn io::Write, fi: &FrameInvariants<T>, obu_extension: u32,
1303) -> io::Result<()> {
1304 let mut buf1 = Vec::new();
1305 let mut buf2 = Vec::new();
1306 {
1307 let mut bw2 = BitWriter::endian(&mut buf2, BigEndian);
1308 bw2.write_sequence_header_obu(fi)?;
1309 bw2.write_bit(true)?; bw2.byte_align()?;
1311 }
1312
1313 {
1314 let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
1315 bw1.write_obu_header(ObuType::OBU_SEQUENCE_HEADER, obu_extension)?;
1316 }
1317 packet.write_all(&buf1).unwrap();
1318 buf1.clear();
1319
1320 {
1321 let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
1322 bw1.write_uleb128(buf2.len() as u64)?;
1323 }
1324
1325 packet.write_all(&buf1).unwrap();
1326 buf1.clear();
1327
1328 packet.write_all(&buf2).unwrap();
1329 buf2.clear();
1330
1331 if fi.sequence.content_light.is_some() {
1332 let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
1333 bw1.write_sequence_metadata_obu(
1334 ObuMetaType::OBU_META_HDR_CLL,
1335 &fi.sequence,
1336 )?;
1337 packet.write_all(&buf1).unwrap();
1338 buf1.clear();
1339 }
1340
1341 if fi.sequence.mastering_display.is_some() {
1342 let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
1343 bw1.write_sequence_metadata_obu(
1344 ObuMetaType::OBU_META_HDR_MDCV,
1345 &fi.sequence,
1346 )?;
1347 packet.write_all(&buf1).unwrap();
1348 buf1.clear();
1349 }
1350
1351 Ok(())
1352}
1353
1354fn diff<T: Pixel>(
1356 dst: &mut [MaybeUninit<i16>], src1: &PlaneRegion<'_, T>,
1357 src2: &PlaneRegion<'_, T>,
1358) {
1359 debug_assert!(dst.len() % src1.rect().width == 0);
1360 debug_assert_eq!(src1.rows_iter().count(), src1.rect().height);
1361
1362 let width = src1.rect().width;
1363 let height = src1.rect().height;
1364
1365 if width == 0
1366 || width != src2.rect().width
1367 || height == 0
1368 || src1.rows_iter().len() != src2.rows_iter().len()
1369 {
1370 debug_assert!(false);
1371 return;
1372 }
1373
1374 for ((l, s1), s2) in
1375 dst.chunks_exact_mut(width).zip(src1.rows_iter()).zip(src2.rows_iter())
1376 {
1377 for ((r, v1), v2) in l.iter_mut().zip(s1).zip(s2) {
1378 r.write(i16::cast_from(*v1) - i16::cast_from(*v2));
1379 }
1380 }
1381}
1382
1383fn get_qidx<T: Pixel>(
1384 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, cw: &ContextWriter,
1385 tile_bo: TileBlockOffset,
1386) -> u8 {
1387 let mut qidx = fi.base_q_idx;
1388 let sidx = cw.bc.blocks[tile_bo].segmentation_idx as usize;
1389 if ts.segmentation.features[sidx][SegLvl::SEG_LVL_ALT_Q as usize] {
1390 let delta = ts.segmentation.data[sidx][SegLvl::SEG_LVL_ALT_Q as usize];
1391 qidx = clamp((qidx as i16) + delta, 0, 255) as u8;
1392 }
1393 qidx
1394}
1395
1396pub fn encode_tx_block<T: Pixel, W: Writer>(
1405 fi: &FrameInvariants<T>,
1406 ts: &mut TileStateMut<'_, T>,
1407 cw: &mut ContextWriter,
1408 w: &mut W,
1409 p: usize,
1410 tile_partition_bo: TileBlockOffset,
1412 bx: usize,
1414 by: usize,
1415 tx_bo: TileBlockOffset,
1421 mode: PredictionMode,
1422 tx_size: TxSize,
1423 tx_type: TxType,
1424 bsize: BlockSize,
1425 po: PlaneOffset,
1426 skip: bool,
1427 qidx: u8,
1428 ac: &[i16],
1429 pred_intra_param: IntraParam,
1430 rdo_type: RDOType,
1431 need_recon_pixel: bool,
1432) -> (bool, ScaledDistortion) {
1433 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[p].cfg;
1434 let tile_rect = ts.tile_rect().decimated(xdec, ydec);
1435 let area = Area::BlockRect {
1436 bo: tx_bo.0,
1437 width: tx_size.width(),
1438 height: tx_size.height(),
1439 };
1440
1441 if tx_bo.0.x >= ts.mi_width || tx_bo.0.y >= ts.mi_height {
1442 return (false, ScaledDistortion::zero());
1443 }
1444
1445 debug_assert!(tx_bo.0.x < ts.mi_width);
1446 debug_assert!(tx_bo.0.y < ts.mi_height);
1447
1448 debug_assert!(
1449 tx_size.sqr() <= TxSize::TX_32X32 || tx_type == TxType::DCT_DCT
1450 );
1451
1452 let plane_bsize = bsize.subsampled_size(xdec, ydec).unwrap();
1453
1454 debug_assert!(p != 0 || !mode.is_intra() || tx_size.block_size() == plane_bsize || need_recon_pixel,
1455 "mode.is_intra()={:#?}, plane={:#?}, tx_size.block_size()={:#?}, plane_bsize={:#?}, need_recon_pixel={:#?}",
1456 mode.is_intra(), p, tx_size.block_size(), plane_bsize, need_recon_pixel);
1457
1458 let ief_params = if mode.is_directional()
1459 && fi.sequence.enable_intra_edge_filter
1460 {
1461 let (plane_xdec, plane_ydec) = if p == 0 { (0, 0) } else { (xdec, ydec) };
1462 let above_block_info =
1463 ts.above_block_info(tile_partition_bo, plane_xdec, plane_ydec);
1464 let left_block_info =
1465 ts.left_block_info(tile_partition_bo, plane_xdec, plane_ydec);
1466 Some(IntraEdgeFilterParameters::new(p, above_block_info, left_block_info))
1467 } else {
1468 None
1469 };
1470
1471 let frame_bo = ts.to_frame_block_offset(tx_bo);
1472 let rec = &mut ts.rec.planes[p];
1473
1474 if mode.is_intra() {
1475 let bit_depth = fi.sequence.bit_depth;
1476 let mut edge_buf = Aligned::uninit_array();
1477 let edge_buf = get_intra_edges(
1478 &mut edge_buf,
1479 &rec.as_const(),
1480 tile_partition_bo,
1481 bx,
1482 by,
1483 bsize,
1484 po,
1485 tx_size,
1486 bit_depth,
1487 Some(mode),
1488 fi.sequence.enable_intra_edge_filter,
1489 pred_intra_param,
1490 );
1491
1492 mode.predict_intra(
1493 tile_rect,
1494 &mut rec.subregion_mut(area),
1495 tx_size,
1496 bit_depth,
1497 ac,
1498 pred_intra_param,
1499 ief_params,
1500 &edge_buf,
1501 fi.cpu_feature_level,
1502 );
1503 }
1504
1505 if skip {
1506 return (false, ScaledDistortion::zero());
1507 }
1508
1509 let coded_tx_area = av1_get_coded_tx_size(tx_size).area();
1510 let mut residual = Aligned::<[MaybeUninit<i16>; 64 * 64]>::uninit_array();
1511 let mut coeffs = Aligned::<[MaybeUninit<T::Coeff>; 64 * 64]>::uninit_array();
1512 let mut qcoeffs =
1513 Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array();
1514 let mut rcoeffs =
1515 Aligned::<[MaybeUninit<T::Coeff>; 32 * 32]>::uninit_array();
1516 let residual = &mut residual.data[..tx_size.area()];
1517 let coeffs = &mut coeffs.data[..tx_size.area()];
1518 let qcoeffs = init_slice_repeat_mut(
1519 &mut qcoeffs.data[..coded_tx_area],
1520 T::Coeff::cast_from(0),
1521 );
1522 let rcoeffs = &mut rcoeffs.data[..coded_tx_area];
1523
1524 let (visible_tx_w, visible_tx_h) = clip_visible_bsize(
1525 (fi.width + xdec) >> xdec,
1526 (fi.height + ydec) >> ydec,
1527 tx_size.block_size(),
1528 (frame_bo.0.x << MI_SIZE_LOG2) >> xdec,
1529 (frame_bo.0.y << MI_SIZE_LOG2) >> ydec,
1530 );
1531
1532 if visible_tx_w != 0 && visible_tx_h != 0 {
1533 diff(
1534 residual,
1535 &ts.input_tile.planes[p].subregion(area),
1536 &rec.subregion(area),
1537 );
1538 } else {
1539 residual.fill(MaybeUninit::new(0));
1540 }
1541 let residual = unsafe { slice_assume_init_mut(residual) };
1543
1544 forward_transform(
1545 residual,
1546 coeffs,
1547 tx_size.width(),
1548 tx_size,
1549 tx_type,
1550 fi.sequence.bit_depth,
1551 fi.cpu_feature_level,
1552 );
1553 let coeffs = unsafe { slice_assume_init_mut(coeffs) };
1555
1556 let eob = ts.qc.quantize(coeffs, qcoeffs, tx_size, tx_type);
1557
1558 let has_coeff = if need_recon_pixel || rdo_type.needs_coeff_rate() {
1559 debug_assert!((((fi.w_in_b - frame_bo.0.x) << MI_SIZE_LOG2) >> xdec) >= 4);
1560 debug_assert!((((fi.h_in_b - frame_bo.0.y) << MI_SIZE_LOG2) >> ydec) >= 4);
1561 let frame_clipped_txw: usize =
1562 (((fi.w_in_b - frame_bo.0.x) << MI_SIZE_LOG2) >> xdec)
1563 .min(tx_size.width());
1564 let frame_clipped_txh: usize =
1565 (((fi.h_in_b - frame_bo.0.y) << MI_SIZE_LOG2) >> ydec)
1566 .min(tx_size.height());
1567
1568 cw.write_coeffs_lv_map(
1569 w,
1570 p,
1571 tx_bo,
1572 qcoeffs,
1573 eob,
1574 mode,
1575 tx_size,
1576 tx_type,
1577 plane_bsize,
1578 xdec,
1579 ydec,
1580 fi.use_reduced_tx_set,
1581 frame_clipped_txw,
1582 frame_clipped_txh,
1583 )
1584 } else {
1585 true
1586 };
1587
1588 dequantize(
1590 qidx,
1591 qcoeffs,
1592 eob,
1593 rcoeffs,
1594 tx_size,
1595 fi.sequence.bit_depth,
1596 fi.dc_delta_q[p],
1597 fi.ac_delta_q[p],
1598 fi.cpu_feature_level,
1599 );
1600 let rcoeffs = unsafe { slice_assume_init_mut(rcoeffs) };
1602
1603 if eob == 0 {
1604 } else if !fi.use_tx_domain_distortion || need_recon_pixel {
1606 inverse_transform_add(
1607 rcoeffs,
1608 &mut rec.subregion_mut(area),
1609 eob,
1610 tx_size,
1611 tx_type,
1612 fi.sequence.bit_depth,
1613 fi.cpu_feature_level,
1614 );
1615 }
1616
1617 let tx_dist =
1618 if rdo_type.needs_tx_dist() && visible_tx_w != 0 && visible_tx_h != 0 {
1619 let mut raw_tx_dist = coeffs
1626 .iter()
1627 .zip(rcoeffs.iter())
1628 .map(|(&a, &b)| {
1629 let c = i32::cast_from(a) - i32::cast_from(b);
1630 (c * c) as u64
1631 })
1632 .sum::<u64>()
1633 + coeffs[rcoeffs.len()..]
1634 .iter()
1635 .map(|&a| {
1636 let c = i32::cast_from(a);
1637 (c * c) as u64
1638 })
1639 .sum::<u64>();
1640
1641 let tx_dist_scale_bits = 2 * (3 - get_log_tx_scale(tx_size));
1642 let tx_dist_scale_rounding_offset = 1 << (tx_dist_scale_bits - 1);
1643
1644 raw_tx_dist =
1645 (raw_tx_dist + tx_dist_scale_rounding_offset) >> tx_dist_scale_bits;
1646
1647 if rdo_type == RDOType::TxDistEstRate {
1648 let estimated_rate =
1650 estimate_rate(fi.base_q_idx, tx_size, raw_tx_dist);
1651 w.add_bits_frac(estimated_rate as u32);
1652 }
1653
1654 let bias = distortion_scale(fi, ts.to_frame_block_offset(tx_bo), bsize);
1655 RawDistortion::new(raw_tx_dist) * bias * fi.dist_scale[p]
1656 } else {
1657 ScaledDistortion::zero()
1658 };
1659
1660 (has_coeff, tx_dist)
1661}
1662
1663#[profiling::function]
1667pub fn motion_compensate<T: Pixel>(
1668 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1669 cw: &mut ContextWriter, luma_mode: PredictionMode, ref_frames: [RefType; 2],
1670 mvs: [MotionVector; 2], bsize: BlockSize, tile_bo: TileBlockOffset,
1671 luma_only: bool,
1672) {
1673 debug_assert!(!luma_mode.is_intra());
1674
1675 let PlaneConfig { xdec: u_xdec, ydec: u_ydec, .. } = ts.input.planes[1].cfg;
1676
1677 let num_planes = 1
1680 + if !luma_only
1681 && has_chroma(
1682 tile_bo,
1683 bsize,
1684 u_xdec,
1685 u_ydec,
1686 fi.sequence.chroma_sampling,
1687 ) {
1688 2
1689 } else {
1690 0
1691 };
1692
1693 let luma_tile_rect = ts.tile_rect();
1694 let compound_buffer = &mut ts.inter_compound_buffers;
1695 for p in 0..num_planes {
1696 let plane_bsize = if p == 0 {
1697 bsize
1698 } else {
1699 bsize.subsampled_size(u_xdec, u_ydec).unwrap()
1700 };
1701
1702 let rec = &mut ts.rec.planes[p];
1703 let po = tile_bo.plane_offset(rec.plane_cfg);
1704 let &PlaneConfig { xdec, ydec, .. } = rec.plane_cfg;
1705 let tile_rect = luma_tile_rect.decimated(xdec, ydec);
1706
1707 let area = Area::BlockStartingAt { bo: tile_bo.0 };
1708 if p > 0 && bsize < BlockSize::BLOCK_8X8 {
1709 let mut some_use_intra = false;
1710 if bsize == BlockSize::BLOCK_4X4 || bsize == BlockSize::BLOCK_4X8 {
1711 some_use_intra |=
1712 cw.bc.blocks[tile_bo.with_offset(-1, 0)].mode.is_intra();
1713 };
1714 if !some_use_intra && bsize == BlockSize::BLOCK_4X4
1715 || bsize == BlockSize::BLOCK_8X4
1716 {
1717 some_use_intra |=
1718 cw.bc.blocks[tile_bo.with_offset(0, -1)].mode.is_intra();
1719 };
1720 if !some_use_intra && bsize == BlockSize::BLOCK_4X4 {
1721 some_use_intra |=
1722 cw.bc.blocks[tile_bo.with_offset(-1, -1)].mode.is_intra();
1723 };
1724
1725 if some_use_intra {
1726 luma_mode.predict_inter(
1727 fi,
1728 tile_rect,
1729 p,
1730 po,
1731 &mut rec.subregion_mut(area),
1732 plane_bsize.width(),
1733 plane_bsize.height(),
1734 ref_frames,
1735 mvs,
1736 compound_buffer,
1737 );
1738 } else {
1739 assert!(u_xdec == 1 && u_ydec == 1);
1740 if bsize == BlockSize::BLOCK_4X4 {
1742 let mv0 = cw.bc.blocks[tile_bo.with_offset(-1, -1)].mv;
1743 let rf0 = cw.bc.blocks[tile_bo.with_offset(-1, -1)].ref_frames;
1744 let mv1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].mv;
1745 let rf1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].ref_frames;
1746 let po1 = PlaneOffset { x: po.x + 2, y: po.y };
1747 let area1 = Area::StartingAt { x: po1.x, y: po1.y };
1748 let mv2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].mv;
1749 let rf2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].ref_frames;
1750 let po2 = PlaneOffset { x: po.x, y: po.y + 2 };
1751 let area2 = Area::StartingAt { x: po2.x, y: po2.y };
1752 let po3 = PlaneOffset { x: po.x + 2, y: po.y + 2 };
1753 let area3 = Area::StartingAt { x: po3.x, y: po3.y };
1754 luma_mode.predict_inter(
1755 fi,
1756 tile_rect,
1757 p,
1758 po,
1759 &mut rec.subregion_mut(area),
1760 2,
1761 2,
1762 rf0,
1763 mv0,
1764 compound_buffer,
1765 );
1766 luma_mode.predict_inter(
1767 fi,
1768 tile_rect,
1769 p,
1770 po1,
1771 &mut rec.subregion_mut(area1),
1772 2,
1773 2,
1774 rf1,
1775 mv1,
1776 compound_buffer,
1777 );
1778 luma_mode.predict_inter(
1779 fi,
1780 tile_rect,
1781 p,
1782 po2,
1783 &mut rec.subregion_mut(area2),
1784 2,
1785 2,
1786 rf2,
1787 mv2,
1788 compound_buffer,
1789 );
1790 luma_mode.predict_inter(
1791 fi,
1792 tile_rect,
1793 p,
1794 po3,
1795 &mut rec.subregion_mut(area3),
1796 2,
1797 2,
1798 ref_frames,
1799 mvs,
1800 compound_buffer,
1801 );
1802 }
1803 if bsize == BlockSize::BLOCK_8X4 {
1804 let mv1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].mv;
1805 let rf1 = cw.bc.blocks[tile_bo.with_offset(0, -1)].ref_frames;
1806 luma_mode.predict_inter(
1807 fi,
1808 tile_rect,
1809 p,
1810 po,
1811 &mut rec.subregion_mut(area),
1812 4,
1813 2,
1814 rf1,
1815 mv1,
1816 compound_buffer,
1817 );
1818 let po3 = PlaneOffset { x: po.x, y: po.y + 2 };
1819 let area3 = Area::StartingAt { x: po3.x, y: po3.y };
1820 luma_mode.predict_inter(
1821 fi,
1822 tile_rect,
1823 p,
1824 po3,
1825 &mut rec.subregion_mut(area3),
1826 4,
1827 2,
1828 ref_frames,
1829 mvs,
1830 compound_buffer,
1831 );
1832 }
1833 if bsize == BlockSize::BLOCK_4X8 {
1834 let mv2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].mv;
1835 let rf2 = cw.bc.blocks[tile_bo.with_offset(-1, 0)].ref_frames;
1836 luma_mode.predict_inter(
1837 fi,
1838 tile_rect,
1839 p,
1840 po,
1841 &mut rec.subregion_mut(area),
1842 2,
1843 4,
1844 rf2,
1845 mv2,
1846 compound_buffer,
1847 );
1848 let po3 = PlaneOffset { x: po.x + 2, y: po.y };
1849 let area3 = Area::StartingAt { x: po3.x, y: po3.y };
1850 luma_mode.predict_inter(
1851 fi,
1852 tile_rect,
1853 p,
1854 po3,
1855 &mut rec.subregion_mut(area3),
1856 2,
1857 4,
1858 ref_frames,
1859 mvs,
1860 compound_buffer,
1861 );
1862 }
1863 }
1864 } else {
1865 luma_mode.predict_inter(
1866 fi,
1867 tile_rect,
1868 p,
1869 po,
1870 &mut rec.subregion_mut(area),
1871 plane_bsize.width(),
1872 plane_bsize.height(),
1873 ref_frames,
1874 mvs,
1875 compound_buffer,
1876 );
1877 }
1878 }
1879}
1880
1881pub fn save_block_motion<T: Pixel>(
1882 ts: &mut TileStateMut<'_, T>, bsize: BlockSize, tile_bo: TileBlockOffset,
1883 ref_frame: usize, mv: MotionVector,
1884) {
1885 let tile_me_stats = &mut ts.me_stats[ref_frame];
1886 let tile_bo_x_end = (tile_bo.0.x + bsize.width_mi()).min(ts.mi_width);
1887 let tile_bo_y_end = (tile_bo.0.y + bsize.height_mi()).min(ts.mi_height);
1888 for mi_y in tile_bo.0.y..tile_bo_y_end {
1889 for mi_x in tile_bo.0.x..tile_bo_x_end {
1890 tile_me_stats[mi_y][mi_x].mv = mv;
1891 }
1892 }
1893}
1894
1895#[profiling::function]
1896pub fn encode_block_pre_cdef<T: Pixel, W: Writer>(
1897 seq: &Sequence, ts: &TileStateMut<'_, T>, cw: &mut ContextWriter, w: &mut W,
1898 bsize: BlockSize, tile_bo: TileBlockOffset, skip: bool,
1899) -> bool {
1900 cw.bc.blocks.set_skip(tile_bo, bsize, skip);
1901 if ts.segmentation.enabled
1902 && ts.segmentation.update_map
1903 && ts.segmentation.preskip
1904 {
1905 cw.write_segmentation(
1906 w,
1907 tile_bo,
1908 bsize,
1909 false,
1910 ts.segmentation.last_active_segid,
1911 );
1912 }
1913 cw.write_skip(w, tile_bo, skip);
1914 if ts.segmentation.enabled
1915 && ts.segmentation.update_map
1916 && !ts.segmentation.preskip
1917 {
1918 cw.write_segmentation(
1919 w,
1920 tile_bo,
1921 bsize,
1922 skip,
1923 ts.segmentation.last_active_segid,
1924 );
1925 }
1926 if !skip && seq.enable_cdef {
1927 cw.bc.cdef_coded = true;
1928 }
1929 cw.bc.cdef_coded
1930}
1931
1932#[profiling::function]
1937pub fn encode_block_post_cdef<T: Pixel, W: Writer>(
1938 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1939 cw: &mut ContextWriter, w: &mut W, luma_mode: PredictionMode,
1940 chroma_mode: PredictionMode, angle_delta: AngleDelta,
1941 ref_frames: [RefType; 2], mvs: [MotionVector; 2], bsize: BlockSize,
1942 tile_bo: TileBlockOffset, skip: bool, cfl: CFLParams, tx_size: TxSize,
1943 tx_type: TxType, mode_context: usize, mv_stack: &[CandidateMV],
1944 rdo_type: RDOType, need_recon_pixel: bool,
1945 enc_stats: Option<&mut EncoderStats>,
1946) -> (bool, ScaledDistortion) {
1947 let planes =
1948 if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 };
1949 let is_inter = !luma_mode.is_intra();
1950 if is_inter {
1951 assert!(luma_mode == chroma_mode);
1952 };
1953 let sb_size = if fi.sequence.use_128x128_superblock {
1954 BlockSize::BLOCK_128X128
1955 } else {
1956 BlockSize::BLOCK_64X64
1957 };
1958 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
1959 if skip {
1960 cw.bc.reset_skip_context(
1961 tile_bo,
1962 bsize,
1963 xdec,
1964 ydec,
1965 fi.sequence.chroma_sampling,
1966 );
1967 }
1968 cw.bc.blocks.set_block_size(tile_bo, bsize);
1969 cw.bc.blocks.set_mode(tile_bo, bsize, luma_mode);
1970 cw.bc.blocks.set_tx_size(tile_bo, bsize, tx_size);
1971 cw.bc.blocks.set_ref_frames(tile_bo, bsize, ref_frames);
1972 cw.bc.blocks.set_motion_vectors(tile_bo, bsize, mvs);
1973
1974 if cw.bc.code_deltas
1976 && ts.deblock.block_deltas_enabled
1977 && (bsize < sb_size || !skip)
1978 {
1979 cw.write_block_deblock_deltas(
1980 w,
1981 tile_bo,
1982 ts.deblock.block_delta_multi,
1983 planes,
1984 );
1985 }
1986 cw.bc.code_deltas = false;
1987
1988 if fi.frame_type.has_inter() {
1989 cw.write_is_inter(w, tile_bo, is_inter);
1990 if is_inter {
1991 cw.fill_neighbours_ref_counts(tile_bo);
1992 cw.write_ref_frames(w, fi, tile_bo);
1993
1994 if luma_mode.is_compound() {
1995 cw.write_compound_mode(w, luma_mode, mode_context);
1996 } else {
1997 cw.write_inter_mode(w, luma_mode, mode_context);
1998 }
1999
2000 let ref_mv_idx = 0;
2001 let num_mv_found = mv_stack.len();
2002
2003 if luma_mode == PredictionMode::NEWMV
2004 || luma_mode == PredictionMode::NEW_NEWMV
2005 {
2006 if luma_mode == PredictionMode::NEW_NEWMV {
2007 assert!(num_mv_found >= 2);
2008 }
2009 for idx in 0..2 {
2010 if num_mv_found > idx + 1 {
2011 let drl_mode = ref_mv_idx > idx;
2012 let ctx: usize = (mv_stack[idx].weight < REF_CAT_LEVEL) as usize
2013 + (mv_stack[idx + 1].weight < REF_CAT_LEVEL) as usize;
2014 cw.write_drl_mode(w, drl_mode, ctx);
2015 if !drl_mode {
2016 break;
2017 }
2018 }
2019 }
2020 }
2021
2022 let ref_mvs = if num_mv_found > 0 {
2023 [mv_stack[ref_mv_idx].this_mv, mv_stack[ref_mv_idx].comp_mv]
2024 } else {
2025 [MotionVector::default(); 2]
2026 };
2027
2028 let mv_precision = if fi.force_integer_mv != 0 {
2029 MvSubpelPrecision::MV_SUBPEL_NONE
2030 } else if fi.allow_high_precision_mv {
2031 MvSubpelPrecision::MV_SUBPEL_HIGH_PRECISION
2032 } else {
2033 MvSubpelPrecision::MV_SUBPEL_LOW_PRECISION
2034 };
2035
2036 if luma_mode == PredictionMode::NEWMV
2037 || luma_mode == PredictionMode::NEW_NEWMV
2038 || luma_mode == PredictionMode::NEW_NEARESTMV
2039 {
2040 cw.write_mv(w, mvs[0], ref_mvs[0], mv_precision);
2041 }
2042 if luma_mode == PredictionMode::NEW_NEWMV
2043 || luma_mode == PredictionMode::NEAREST_NEWMV
2044 {
2045 cw.write_mv(w, mvs[1], ref_mvs[1], mv_precision);
2046 }
2047
2048 if luma_mode.has_nearmv() {
2049 let ref_mv_idx = luma_mode.ref_mv_idx();
2050 if luma_mode != PredictionMode::NEAR0MV {
2051 assert!(num_mv_found > ref_mv_idx);
2052 }
2053
2054 for idx in 1..3 {
2055 if num_mv_found > idx + 1 {
2056 let drl_mode = ref_mv_idx > idx;
2057 let ctx: usize = (mv_stack[idx].weight < REF_CAT_LEVEL) as usize
2058 + (mv_stack[idx + 1].weight < REF_CAT_LEVEL) as usize;
2059
2060 cw.write_drl_mode(w, drl_mode, ctx);
2061 if !drl_mode {
2062 break;
2063 }
2064 }
2065 }
2066 if mv_stack.len() > 1 {
2067 assert!(mv_stack[ref_mv_idx].this_mv.row == mvs[0].row);
2068 assert!(mv_stack[ref_mv_idx].this_mv.col == mvs[0].col);
2069 } else {
2070 assert!(0 == mvs[0].row);
2071 assert!(0 == mvs[0].col);
2072 }
2073 } else if luma_mode == PredictionMode::NEARESTMV {
2074 if mv_stack.is_empty() {
2075 assert_eq!(mvs[0].row, 0);
2076 assert_eq!(mvs[0].col, 0);
2077 } else {
2078 assert_eq!(mvs[0].row, mv_stack[0].this_mv.row);
2079 assert_eq!(mvs[0].col, mv_stack[0].this_mv.col);
2080 }
2081 }
2082 } else {
2083 cw.write_intra_mode(w, bsize, luma_mode);
2084 }
2085 } else {
2086 cw.write_intra_mode_kf(w, tile_bo, luma_mode);
2087 }
2088
2089 if !is_inter {
2090 if luma_mode.is_directional() && bsize >= BlockSize::BLOCK_8X8 {
2091 cw.write_angle_delta(w, angle_delta.y, luma_mode);
2092 }
2093 if has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling) {
2094 cw.write_intra_uv_mode(w, chroma_mode, luma_mode, bsize);
2095 if chroma_mode.is_cfl() {
2096 assert!(bsize.cfl_allowed());
2097 cw.write_cfl_alphas(w, cfl);
2098 }
2099 if chroma_mode.is_directional() && bsize >= BlockSize::BLOCK_8X8 {
2100 cw.write_angle_delta(w, angle_delta.uv, chroma_mode);
2101 }
2102 }
2103
2104 if fi.allow_screen_content_tools > 0
2105 && bsize >= BlockSize::BLOCK_8X8
2106 && bsize.width() <= 64
2107 && bsize.height() <= 64
2108 {
2109 cw.write_use_palette_mode(
2110 w,
2111 false,
2112 bsize,
2113 tile_bo,
2114 luma_mode,
2115 chroma_mode,
2116 xdec,
2117 ydec,
2118 fi.sequence.chroma_sampling,
2119 );
2120 }
2121
2122 if fi.sequence.enable_filter_intra
2123 && luma_mode == PredictionMode::DC_PRED
2124 && bsize.width() <= 32
2125 && bsize.height() <= 32
2126 {
2127 cw.write_use_filter_intra(w, false, bsize); }
2129 }
2130
2131 if fi.tx_mode_select {
2133 if bsize > BlockSize::BLOCK_4X4 && (!is_inter || !skip) {
2134 if !is_inter {
2135 cw.write_tx_size_intra(w, tile_bo, bsize, tx_size);
2136 cw.bc.update_tx_size_context(tile_bo, bsize, tx_size, false);
2137 } else {
2138 debug_assert!(fi.tx_mode_select);
2141 debug_assert!(bsize > BlockSize::BLOCK_4X4);
2142 debug_assert!(is_inter);
2143 debug_assert!(!skip);
2144 let max_tx_size = max_txsize_rect_lookup[bsize as usize];
2145 debug_assert!(max_tx_size.block_size() <= BlockSize::BLOCK_64X64);
2146
2147 let txfm_split =
2150 fi.enable_inter_txfm_split && tx_size.block_size() < bsize;
2151
2152 cw.write_tx_size_inter(
2154 w,
2155 tile_bo,
2156 bsize,
2157 max_tx_size,
2158 txfm_split,
2159 0,
2160 0,
2161 0,
2162 );
2163 }
2164 } else {
2165 debug_assert!(bsize == BlockSize::BLOCK_4X4 || (is_inter && skip));
2166 cw.bc.update_tx_size_context(tile_bo, bsize, tx_size, is_inter && skip);
2167 }
2168 }
2169
2170 if let Some(enc_stats) = enc_stats {
2171 let pixels = tx_size.area();
2172 enc_stats.block_size_counts[bsize as usize] += pixels;
2173 enc_stats.tx_type_counts[tx_type as usize] += pixels;
2174 enc_stats.luma_pred_mode_counts[luma_mode as usize] += pixels;
2175 enc_stats.chroma_pred_mode_counts[chroma_mode as usize] += pixels;
2176 if skip {
2177 enc_stats.skip_block_count += pixels;
2178 }
2179 }
2180
2181 if fi.sequence.enable_intra_edge_filter {
2182 for y in 0..bsize.height_mi() {
2183 if tile_bo.0.y + y >= ts.mi_height {
2184 continue;
2185 }
2186 for x in 0..bsize.width_mi() {
2187 if tile_bo.0.x + x >= ts.mi_width {
2188 continue;
2189 }
2190 let bi = &mut ts.coded_block_info[tile_bo.0.y + y][tile_bo.0.x + x];
2191 bi.luma_mode = luma_mode;
2192 bi.chroma_mode = chroma_mode;
2193 bi.reference_types = ref_frames;
2194 }
2195 }
2196 }
2197
2198 if is_inter {
2199 motion_compensate(
2200 fi, ts, cw, luma_mode, ref_frames, mvs, bsize, tile_bo, false,
2201 );
2202 write_tx_tree(
2203 fi,
2204 ts,
2205 cw,
2206 w,
2207 luma_mode,
2208 angle_delta.y,
2209 tile_bo,
2210 bsize,
2211 tx_size,
2212 tx_type,
2213 skip,
2214 false,
2215 rdo_type,
2216 need_recon_pixel,
2217 )
2218 } else {
2219 write_tx_blocks(
2220 fi,
2221 ts,
2222 cw,
2223 w,
2224 luma_mode,
2225 chroma_mode,
2226 angle_delta,
2227 tile_bo,
2228 bsize,
2229 tx_size,
2230 tx_type,
2231 skip,
2232 cfl,
2233 false,
2234 rdo_type,
2235 need_recon_pixel,
2236 )
2237 }
2238}
2239
2240pub fn write_tx_blocks<T: Pixel, W: Writer>(
2244 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
2245 cw: &mut ContextWriter, w: &mut W, luma_mode: PredictionMode,
2246 chroma_mode: PredictionMode, angle_delta: AngleDelta,
2247 tile_bo: TileBlockOffset, bsize: BlockSize, tx_size: TxSize,
2248 tx_type: TxType, skip: bool, cfl: CFLParams, luma_only: bool,
2249 rdo_type: RDOType, need_recon_pixel: bool,
2250) -> (bool, ScaledDistortion) {
2251 let bw = bsize.width_mi() / tx_size.width_mi();
2252 let bh = bsize.height_mi() / tx_size.height_mi();
2253 let qidx = get_qidx(fi, ts, cw, tile_bo);
2254
2255 if !skip {
2257 assert_ne!(qidx, 0);
2258 }
2259
2260 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
2261 let mut ac = Aligned::<[MaybeUninit<i16>; 32 * 32]>::uninit_array();
2262 let mut partition_has_coeff: bool = false;
2263 let mut tx_dist = ScaledDistortion::zero();
2264 let do_chroma =
2265 has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
2266
2267 ts.qc.update(
2268 qidx,
2269 tx_size,
2270 luma_mode.is_intra(),
2271 fi.sequence.bit_depth,
2272 fi.dc_delta_q[0],
2273 0,
2274 );
2275
2276 for by in 0..bh {
2277 for bx in 0..bw {
2278 let tx_bo = TileBlockOffset(BlockOffset {
2279 x: tile_bo.0.x + bx * tx_size.width_mi(),
2280 y: tile_bo.0.y + by * tx_size.height_mi(),
2281 });
2282 if tx_bo.0.x >= ts.mi_width || tx_bo.0.y >= ts.mi_height {
2283 continue;
2284 }
2285 let po = tx_bo.plane_offset(&ts.input.planes[0].cfg);
2286 let (has_coeff, dist) = encode_tx_block(
2287 fi,
2288 ts,
2289 cw,
2290 w,
2291 0,
2292 tile_bo,
2293 bx,
2294 by,
2295 tx_bo,
2296 luma_mode,
2297 tx_size,
2298 tx_type,
2299 bsize,
2300 po,
2301 skip,
2302 qidx,
2303 &[],
2304 IntraParam::AngleDelta(angle_delta.y),
2305 rdo_type,
2306 need_recon_pixel,
2307 );
2308 partition_has_coeff |= has_coeff;
2309 tx_dist += dist;
2310 }
2311 }
2312
2313 if !do_chroma
2314 || luma_only
2315 || fi.sequence.chroma_sampling == ChromaSampling::Cs400
2316 {
2317 return (partition_has_coeff, tx_dist);
2318 };
2319 debug_assert!(has_chroma(
2320 tile_bo,
2321 bsize,
2322 xdec,
2323 ydec,
2324 fi.sequence.chroma_sampling
2325 ));
2326
2327 let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec);
2328
2329 let mut bw_uv = (bw * tx_size.width_mi()) >> xdec;
2330 let mut bh_uv = (bh * tx_size.height_mi()) >> ydec;
2331
2332 if bw_uv == 0 || bh_uv == 0 {
2333 bw_uv = 1;
2334 bh_uv = 1;
2335 }
2336
2337 bw_uv /= uv_tx_size.width_mi();
2338 bh_uv /= uv_tx_size.height_mi();
2339
2340 let ac_data = if chroma_mode.is_cfl() {
2341 luma_ac(&mut ac.data, ts, tile_bo, bsize, tx_size, fi)
2342 } else {
2343 [].as_slice()
2344 };
2345
2346 let uv_tx_type = if uv_tx_size.width() >= 32 || uv_tx_size.height() >= 32 {
2347 TxType::DCT_DCT
2348 } else {
2349 uv_intra_mode_to_tx_type_context(chroma_mode)
2350 };
2351
2352 for p in 1..3 {
2353 ts.qc.update(
2354 qidx,
2355 uv_tx_size,
2356 true,
2357 fi.sequence.bit_depth,
2358 fi.dc_delta_q[p],
2359 fi.ac_delta_q[p],
2360 );
2361 let alpha = cfl.alpha(p - 1);
2362 for by in 0..bh_uv {
2363 for bx in 0..bw_uv {
2364 let tx_bo = TileBlockOffset(BlockOffset {
2365 x: tile_bo.0.x + ((bx * uv_tx_size.width_mi()) << xdec)
2366 - ((bw * tx_size.width_mi() == 1) as usize) * xdec,
2367 y: tile_bo.0.y + ((by * uv_tx_size.height_mi()) << ydec)
2368 - ((bh * tx_size.height_mi() == 1) as usize) * ydec,
2369 });
2370
2371 let mut po = tile_bo.plane_offset(&ts.input.planes[p].cfg);
2372 po.x += (bx * uv_tx_size.width()) as isize;
2373 po.y += (by * uv_tx_size.height()) as isize;
2374 let (has_coeff, dist) = encode_tx_block(
2375 fi,
2376 ts,
2377 cw,
2378 w,
2379 p,
2380 tile_bo,
2381 bx,
2382 by,
2383 tx_bo,
2384 chroma_mode,
2385 uv_tx_size,
2386 uv_tx_type,
2387 bsize,
2388 po,
2389 skip,
2390 qidx,
2391 ac_data,
2392 if chroma_mode.is_cfl() {
2393 IntraParam::Alpha(alpha)
2394 } else {
2395 IntraParam::AngleDelta(angle_delta.uv)
2396 },
2397 rdo_type,
2398 need_recon_pixel,
2399 );
2400 partition_has_coeff |= has_coeff;
2401 tx_dist += dist;
2402 }
2403 }
2404 }
2405
2406 (partition_has_coeff, tx_dist)
2407}
2408
2409pub fn write_tx_tree<T: Pixel, W: Writer>(
2410 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
2411 cw: &mut ContextWriter, w: &mut W, luma_mode: PredictionMode,
2412 angle_delta_y: i8, tile_bo: TileBlockOffset, bsize: BlockSize,
2413 tx_size: TxSize, tx_type: TxType, skip: bool, luma_only: bool,
2414 rdo_type: RDOType, need_recon_pixel: bool,
2415) -> (bool, ScaledDistortion) {
2416 if skip {
2417 return (false, ScaledDistortion::zero());
2418 }
2419 let bw = bsize.width_mi() / tx_size.width_mi();
2420 let bh = bsize.height_mi() / tx_size.height_mi();
2421 let qidx = get_qidx(fi, ts, cw, tile_bo);
2422
2423 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
2424 let ac = &[0i16; 0];
2425 let mut partition_has_coeff: bool = false;
2426 let mut tx_dist = ScaledDistortion::zero();
2427
2428 ts.qc.update(
2429 qidx,
2430 tx_size,
2431 luma_mode.is_intra(),
2432 fi.sequence.bit_depth,
2433 fi.dc_delta_q[0],
2434 0,
2435 );
2436
2437 for by in 0..bh {
2441 for bx in 0..bw {
2442 let tx_bo = TileBlockOffset(BlockOffset {
2443 x: tile_bo.0.x + bx * tx_size.width_mi(),
2444 y: tile_bo.0.y + by * tx_size.height_mi(),
2445 });
2446 if tx_bo.0.x >= ts.mi_width || tx_bo.0.y >= ts.mi_height {
2447 continue;
2448 }
2449
2450 let po = tx_bo.plane_offset(&ts.input.planes[0].cfg);
2451 let (has_coeff, dist) = encode_tx_block(
2452 fi,
2453 ts,
2454 cw,
2455 w,
2456 0,
2457 tile_bo,
2458 0,
2459 0,
2460 tx_bo,
2461 luma_mode,
2462 tx_size,
2463 tx_type,
2464 bsize,
2465 po,
2466 skip,
2467 qidx,
2468 ac,
2469 IntraParam::AngleDelta(angle_delta_y),
2470 rdo_type,
2471 need_recon_pixel,
2472 );
2473 partition_has_coeff |= has_coeff;
2474 tx_dist += dist;
2475 }
2476 }
2477
2478 if !has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling)
2479 || luma_only
2480 || fi.sequence.chroma_sampling == ChromaSampling::Cs400
2481 {
2482 return (partition_has_coeff, tx_dist);
2483 };
2484 debug_assert!(has_chroma(
2485 tile_bo,
2486 bsize,
2487 xdec,
2488 ydec,
2489 fi.sequence.chroma_sampling
2490 ));
2491
2492 let max_tx_size = max_txsize_rect_lookup[bsize as usize];
2493 debug_assert!(max_tx_size.block_size() <= BlockSize::BLOCK_64X64);
2494 let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec);
2495
2496 let mut bw_uv = max_tx_size.width_mi() >> xdec;
2497 let mut bh_uv = max_tx_size.height_mi() >> ydec;
2498
2499 if bw_uv == 0 || bh_uv == 0 {
2500 bw_uv = 1;
2501 bh_uv = 1;
2502 }
2503
2504 bw_uv /= uv_tx_size.width_mi();
2505 bh_uv /= uv_tx_size.height_mi();
2506
2507 let uv_tx_type = if partition_has_coeff {
2508 tx_type.uv_inter(uv_tx_size)
2509 } else {
2510 TxType::DCT_DCT
2511 };
2512
2513 for p in 1..3 {
2514 ts.qc.update(
2515 qidx,
2516 uv_tx_size,
2517 false,
2518 fi.sequence.bit_depth,
2519 fi.dc_delta_q[p],
2520 fi.ac_delta_q[p],
2521 );
2522
2523 for by in 0..bh_uv {
2524 for bx in 0..bw_uv {
2525 let tx_bo = TileBlockOffset(BlockOffset {
2526 x: tile_bo.0.x + ((bx * uv_tx_size.width_mi()) << xdec)
2527 - (max_tx_size.width_mi() == 1) as usize * xdec,
2528 y: tile_bo.0.y + ((by * uv_tx_size.height_mi()) << ydec)
2529 - (max_tx_size.height_mi() == 1) as usize * ydec,
2530 });
2531
2532 let mut po = tile_bo.plane_offset(&ts.input.planes[p].cfg);
2533 po.x += (bx * uv_tx_size.width()) as isize;
2534 po.y += (by * uv_tx_size.height()) as isize;
2535 let (has_coeff, dist) = encode_tx_block(
2536 fi,
2537 ts,
2538 cw,
2539 w,
2540 p,
2541 tile_bo,
2542 bx,
2543 by,
2544 tx_bo,
2545 luma_mode,
2546 uv_tx_size,
2547 uv_tx_type,
2548 bsize,
2549 po,
2550 skip,
2551 qidx,
2552 ac,
2553 IntraParam::AngleDelta(angle_delta_y),
2554 rdo_type,
2555 need_recon_pixel,
2556 );
2557 partition_has_coeff |= has_coeff;
2558 tx_dist += dist;
2559 }
2560 }
2561 }
2562
2563 (partition_has_coeff, tx_dist)
2564}
2565
2566#[profiling::function]
2567pub fn encode_block_with_modes<T: Pixel, W: Writer>(
2568 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
2569 cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
2570 bsize: BlockSize, tile_bo: TileBlockOffset,
2571 mode_decision: &PartitionParameters, rdo_type: RDOType,
2572 enc_stats: Option<&mut EncoderStats>,
2573) {
2574 let (mode_luma, mode_chroma) =
2575 (mode_decision.pred_mode_luma, mode_decision.pred_mode_chroma);
2576 let cfl = mode_decision.pred_cfl_params;
2577 let ref_frames = mode_decision.ref_frames;
2578 let mvs = mode_decision.mvs;
2579 let mut skip = mode_decision.skip;
2580 let mut cdef_coded = cw.bc.cdef_coded;
2581
2582 cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, mode_decision.sidx);
2585
2586 let mut mv_stack = ArrayVec::<CandidateMV, 9>::new();
2587 let is_compound = ref_frames[1] != NONE_FRAME;
2588 let mode_context =
2589 cw.find_mvrefs(tile_bo, ref_frames, &mut mv_stack, bsize, fi, is_compound);
2590
2591 let (tx_size, tx_type) = if !mode_decision.skip && !mode_decision.has_coeff {
2592 skip = true;
2593 rdo_tx_size_type(
2594 fi, ts, cw, bsize, tile_bo, mode_luma, ref_frames, mvs, skip,
2595 )
2596 } else {
2597 (mode_decision.tx_size, mode_decision.tx_type)
2598 };
2599
2600 cdef_coded = encode_block_pre_cdef(
2601 &fi.sequence,
2602 ts,
2603 cw,
2604 if cdef_coded { w_post_cdef } else { w_pre_cdef },
2605 bsize,
2606 tile_bo,
2607 skip,
2608 );
2609 encode_block_post_cdef(
2610 fi,
2611 ts,
2612 cw,
2613 if cdef_coded { w_post_cdef } else { w_pre_cdef },
2614 mode_luma,
2615 mode_chroma,
2616 mode_decision.angle_delta,
2617 ref_frames,
2618 mvs,
2619 bsize,
2620 tile_bo,
2621 skip,
2622 cfl,
2623 tx_size,
2624 tx_type,
2625 mode_context,
2626 &mv_stack,
2627 rdo_type,
2628 true,
2629 enc_stats,
2630 );
2631}
2632
2633#[profiling::function]
2634fn encode_partition_bottomup<T: Pixel, W: Writer>(
2635 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
2636 cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
2637 bsize: BlockSize, tile_bo: TileBlockOffset, ref_rd_cost: f64,
2638 inter_cfg: &InterConfig, enc_stats: &mut EncoderStats,
2639) -> PartitionGroupParameters {
2640 let rdo_type = RDOType::PixelDistRealRate;
2641 let mut rd_cost = f64::MAX;
2642 let mut best_rd = f64::MAX;
2643 let mut rdo_output = PartitionGroupParameters {
2644 rd_cost,
2645 part_type: PartitionType::PARTITION_INVALID,
2646 part_modes: ArrayVec::new(),
2647 };
2648
2649 if tile_bo.0.x >= ts.mi_width || tile_bo.0.y >= ts.mi_height {
2650 return rdo_output;
2651 }
2652
2653 let is_square = bsize.is_sqr();
2654 let hbs = bsize.width_mi() / 2;
2655 let has_cols = tile_bo.0.x + hbs < ts.mi_width;
2656 let has_rows = tile_bo.0.y + hbs < ts.mi_height;
2657 let is_straddle_x = tile_bo.0.x + bsize.width_mi() > ts.mi_width;
2658 let is_straddle_y = tile_bo.0.y + bsize.height_mi() > ts.mi_height;
2659
2660 assert!(fi.partition_range.max <= BlockSize::BLOCK_64X64);
2662
2663 let must_split =
2664 is_square && (bsize > fi.partition_range.max || !has_cols || !has_rows);
2665
2666 let can_split = if fi.frame_type.has_inter() &&
2668 fi.sequence.chroma_sampling != ChromaSampling::Cs420 &&
2669 bsize <= BlockSize::BLOCK_8X8 {
2670 false
2671 } else {
2672 (bsize > fi.partition_range.min && is_square) || must_split
2673 };
2674
2675 assert!(bsize >= BlockSize::BLOCK_8X8 || !can_split);
2676
2677 let mut best_partition = PartitionType::PARTITION_INVALID;
2678
2679 let cw_checkpoint = cw.checkpoint(&tile_bo, fi.sequence.chroma_sampling);
2680 let w_pre_checkpoint = w_pre_cdef.checkpoint();
2681 let w_post_checkpoint = w_post_cdef.checkpoint();
2682
2683 if !must_split {
2685 let cost = if bsize >= BlockSize::BLOCK_8X8 && is_square {
2686 let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
2687 let tell = w.tell_frac();
2688 cw.write_partition(w, tile_bo, PartitionType::PARTITION_NONE, bsize);
2689 compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero())
2690 } else {
2691 0.0
2692 };
2693
2694 let mode_decision =
2695 rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg);
2696
2697 if !mode_decision.pred_mode_luma.is_intra() {
2698 save_block_motion(
2700 ts,
2701 mode_decision.bsize,
2702 mode_decision.bo,
2703 mode_decision.ref_frames[0].to_index(),
2704 mode_decision.mvs[0],
2705 );
2706 }
2707
2708 rd_cost = mode_decision.rd_cost + cost;
2709
2710 best_partition = PartitionType::PARTITION_NONE;
2711 best_rd = rd_cost;
2712 rdo_output.part_modes.push(mode_decision.clone());
2713
2714 if !can_split {
2715 encode_block_with_modes(
2716 fi,
2717 ts,
2718 cw,
2719 w_pre_cdef,
2720 w_post_cdef,
2721 bsize,
2722 tile_bo,
2723 &mode_decision,
2724 rdo_type,
2725 Some(enc_stats),
2726 );
2727 }
2728 } let mut early_exit = false;
2731
2732 if can_split {
2734 debug_assert!(is_square);
2735
2736 let mut partition_types = ArrayVec::<PartitionType, 3>::new();
2737 if bsize
2738 <= fi.config.speed_settings.partition.non_square_partition_max_threshold
2739 || is_straddle_x
2740 || is_straddle_y
2741 {
2742 if has_cols {
2743 partition_types.push(PartitionType::PARTITION_HORZ);
2744 }
2745 if !(fi.sequence.chroma_sampling == ChromaSampling::Cs422) && has_rows {
2746 partition_types.push(PartitionType::PARTITION_VERT);
2747 }
2748 }
2749 partition_types.push(PartitionType::PARTITION_SPLIT);
2750
2751 for partition in partition_types {
2752 debug_assert!((has_rows && has_cols) || must_split);
2754 debug_assert!(
2756 has_rows || !has_cols || (partition != PartitionType::PARTITION_VERT)
2757 );
2758 debug_assert!(
2760 !has_rows || has_cols || (partition != PartitionType::PARTITION_HORZ)
2761 );
2762 debug_assert!(
2764 has_rows || has_cols || (partition == PartitionType::PARTITION_SPLIT)
2765 );
2766
2767 cw.rollback(&cw_checkpoint);
2768 w_pre_cdef.rollback(&w_pre_checkpoint);
2769 w_post_cdef.rollback(&w_post_checkpoint);
2770
2771 let subsize = bsize.subsize(partition).unwrap();
2772 let hbsw = subsize.width_mi(); let hbsh = subsize.height_mi(); let mut child_modes = ArrayVec::<PartitionParameters, 4>::new();
2775 rd_cost = 0.0;
2776
2777 if bsize >= BlockSize::BLOCK_8X8 {
2778 let w: &mut W =
2779 if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
2780 let tell = w.tell_frac();
2781 cw.write_partition(w, tile_bo, partition, bsize);
2782 rd_cost =
2783 compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero());
2784 }
2785
2786 let four_partitions = [
2787 tile_bo,
2788 TileBlockOffset(BlockOffset { x: tile_bo.0.x + hbsw, y: tile_bo.0.y }),
2789 TileBlockOffset(BlockOffset { x: tile_bo.0.x, y: tile_bo.0.y + hbsh }),
2790 TileBlockOffset(BlockOffset {
2791 x: tile_bo.0.x + hbsw,
2792 y: tile_bo.0.y + hbsh,
2793 }),
2794 ];
2795 let partitions = get_sub_partitions(&four_partitions, partition);
2796
2797 early_exit = false;
2798 for offset in partitions {
2802 if offset.0.x >= ts.mi_width || offset.0.y >= ts.mi_height {
2803 continue;
2804 }
2805 let child_rdo_output = encode_partition_bottomup(
2806 fi,
2807 ts,
2808 cw,
2809 w_pre_cdef,
2810 w_post_cdef,
2811 subsize,
2812 offset,
2813 best_rd,
2814 inter_cfg,
2815 enc_stats,
2816 );
2817 let cost = child_rdo_output.rd_cost;
2818 assert!(cost >= 0.0);
2819
2820 if cost != f64::MAX {
2821 rd_cost += cost;
2822 if !must_split
2823 && fi.enable_early_exit
2824 && (rd_cost >= best_rd || rd_cost >= ref_rd_cost)
2825 {
2826 assert!(cost != f64::MAX);
2827 early_exit = true;
2828 break;
2829 } else if partition != PartitionType::PARTITION_SPLIT {
2830 child_modes.push(child_rdo_output.part_modes[0].clone());
2831 }
2832 }
2833 }
2834
2835 if !early_exit && rd_cost < best_rd {
2836 best_rd = rd_cost;
2837 best_partition = partition;
2838 if partition != PartitionType::PARTITION_SPLIT {
2839 assert!(!child_modes.is_empty());
2840 rdo_output.part_modes = child_modes;
2841 }
2842 }
2843 }
2844
2845 debug_assert!(
2846 early_exit || best_partition != PartitionType::PARTITION_INVALID
2847 );
2848
2849 if best_partition != PartitionType::PARTITION_SPLIT {
2851 assert!(!rdo_output.part_modes.is_empty());
2852 cw.rollback(&cw_checkpoint);
2853 w_pre_cdef.rollback(&w_pre_checkpoint);
2854 w_post_cdef.rollback(&w_post_checkpoint);
2855
2856 assert!(best_partition != PartitionType::PARTITION_NONE || !must_split);
2857 let subsize = bsize.subsize(best_partition).unwrap();
2858
2859 if bsize >= BlockSize::BLOCK_8X8 {
2860 let w: &mut W =
2861 if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
2862 cw.write_partition(w, tile_bo, best_partition, bsize);
2863 }
2864 for mode in rdo_output.part_modes.clone() {
2865 assert!(subsize == mode.bsize);
2866
2867 if !mode.pred_mode_luma.is_intra() {
2868 save_block_motion(
2869 ts,
2870 mode.bsize,
2871 mode.bo,
2872 mode.ref_frames[0].to_index(),
2873 mode.mvs[0],
2874 );
2875 }
2876
2877 encode_block_with_modes(
2879 fi,
2880 ts,
2881 cw,
2882 w_pre_cdef,
2883 w_post_cdef,
2884 mode.bsize,
2885 mode.bo,
2886 &mode,
2887 rdo_type,
2888 Some(enc_stats),
2889 );
2890 }
2891 }
2892 } assert!(best_partition != PartitionType::PARTITION_INVALID);
2895
2896 if is_square
2897 && bsize >= BlockSize::BLOCK_8X8
2898 && (bsize == BlockSize::BLOCK_8X8
2899 || best_partition != PartitionType::PARTITION_SPLIT)
2900 {
2901 cw.bc.update_partition_context(
2902 tile_bo,
2903 bsize.subsize(best_partition).unwrap(),
2904 bsize,
2905 );
2906 }
2907
2908 rdo_output.rd_cost = best_rd;
2909 rdo_output.part_type = best_partition;
2910
2911 if best_partition != PartitionType::PARTITION_NONE {
2912 rdo_output.part_modes.clear();
2913 }
2914 rdo_output
2915}
2916
2917fn encode_partition_topdown<T: Pixel, W: Writer>(
2918 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
2919 cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
2920 bsize: BlockSize, tile_bo: TileBlockOffset,
2921 block_output: &Option<PartitionGroupParameters>, inter_cfg: &InterConfig,
2922 enc_stats: &mut EncoderStats,
2923) {
2924 if tile_bo.0.x >= ts.mi_width || tile_bo.0.y >= ts.mi_height {
2925 return;
2926 }
2927 let is_square = bsize.is_sqr();
2928 let rdo_type = RDOType::PixelDistRealRate;
2929 let hbs = bsize.width_mi() / 2;
2930 let has_cols = tile_bo.0.x + hbs < ts.mi_width;
2931 let has_rows = tile_bo.0.y + hbs < ts.mi_height;
2932
2933 debug_assert!(fi.partition_range.max <= BlockSize::BLOCK_64X64);
2935
2936 let must_split =
2937 is_square && (bsize > fi.partition_range.max || !has_cols || !has_rows);
2938
2939 let can_split = if fi.frame_type.has_inter() &&
2941 fi.sequence.chroma_sampling != ChromaSampling::Cs420 &&
2942 bsize <= BlockSize::BLOCK_8X8 {
2943 false
2944 } else {
2945 (bsize > fi.partition_range.min && is_square) || must_split
2946 };
2947
2948 let mut rdo_output =
2949 block_output.clone().unwrap_or_else(|| PartitionGroupParameters {
2950 part_type: PartitionType::PARTITION_INVALID,
2951 rd_cost: f64::MAX,
2952 part_modes: ArrayVec::new(),
2953 });
2954
2955 let partition = if must_split {
2956 PartitionType::PARTITION_SPLIT
2957 } else if can_split {
2958 debug_assert!(bsize.is_sqr());
2959
2960 rdo_output = rdo_partition_decision(
2962 fi,
2963 ts,
2964 cw,
2965 w_pre_cdef,
2966 w_post_cdef,
2967 bsize,
2968 tile_bo,
2969 &rdo_output,
2970 &[PartitionType::PARTITION_SPLIT, PartitionType::PARTITION_NONE],
2971 rdo_type,
2972 inter_cfg,
2973 );
2974 rdo_output.part_type
2975 } else {
2976 PartitionType::PARTITION_NONE
2978 };
2979
2980 debug_assert!(partition != PartitionType::PARTITION_INVALID);
2981
2982 let subsize = bsize.subsize(partition).unwrap();
2983
2984 if bsize >= BlockSize::BLOCK_8X8 && is_square {
2985 let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
2986 cw.write_partition(w, tile_bo, partition, bsize);
2987 }
2988
2989 match partition {
2990 PartitionType::PARTITION_NONE => {
2991 let rdo_decision;
2992 let part_decision =
2993 if let Some(part_mode) = rdo_output.part_modes.first() {
2994 part_mode
2996 } else {
2997 rdo_decision =
2999 rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg);
3000 &rdo_decision
3001 };
3002
3003 let mut mode_luma = part_decision.pred_mode_luma;
3004 let mut mode_chroma = part_decision.pred_mode_chroma;
3005
3006 let cfl = part_decision.pred_cfl_params;
3007 let skip = part_decision.skip;
3008 let ref_frames = part_decision.ref_frames;
3009 let mvs = part_decision.mvs;
3010 let mut cdef_coded = cw.bc.cdef_coded;
3011
3012 cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, part_decision.sidx);
3015
3016 let (tx_size, tx_type) = rdo_tx_size_type(
3020 fi, ts, cw, bsize, tile_bo, mode_luma, ref_frames, mvs, skip,
3021 );
3022
3023 let mut mv_stack = ArrayVec::<CandidateMV, 9>::new();
3024 let is_compound = ref_frames[1] != NONE_FRAME;
3025 let mode_context = cw.find_mvrefs(
3026 tile_bo,
3027 ref_frames,
3028 &mut mv_stack,
3029 bsize,
3030 fi,
3031 is_compound,
3032 );
3033
3034 if !mode_luma.is_intra() {
3036 if is_compound && mode_luma != PredictionMode::GLOBAL_GLOBALMV {
3037 let match0 = mv_stack[0].this_mv.row == mvs[0].row
3038 && mv_stack[0].this_mv.col == mvs[0].col;
3039 let match1 = mv_stack[0].comp_mv.row == mvs[1].row
3040 && mv_stack[0].comp_mv.col == mvs[1].col;
3041
3042 let match2 = mv_stack[1].this_mv.row == mvs[0].row
3043 && mv_stack[1].this_mv.col == mvs[0].col;
3044 let match3 = mv_stack[1].comp_mv.row == mvs[1].row
3045 && mv_stack[1].comp_mv.col == mvs[1].col;
3046
3047 let match4 = mv_stack.len() > 2 && mv_stack[2].this_mv == mvs[0];
3048 let match5 = mv_stack.len() > 2 && mv_stack[2].comp_mv == mvs[1];
3049
3050 let match6 = mv_stack.len() > 3 && mv_stack[3].this_mv == mvs[0];
3051 let match7 = mv_stack.len() > 3 && mv_stack[3].comp_mv == mvs[1];
3052
3053 mode_luma = if match0 && match1 {
3054 PredictionMode::NEAREST_NEARESTMV
3055 } else if match2 && match3 {
3056 PredictionMode::NEAR_NEAR0MV
3057 } else if match4 && match5 {
3058 PredictionMode::NEAR_NEAR1MV
3059 } else if match6 && match7 {
3060 PredictionMode::NEAR_NEAR2MV
3061 } else if match0 {
3062 PredictionMode::NEAREST_NEWMV
3063 } else if match1 {
3064 PredictionMode::NEW_NEARESTMV
3065 } else {
3066 PredictionMode::NEW_NEWMV
3067 };
3068
3069 if mode_luma != PredictionMode::NEAREST_NEARESTMV
3070 && mvs[0].row == 0
3071 && mvs[0].col == 0
3072 && mvs[1].row == 0
3073 && mvs[1].col == 0
3074 {
3075 mode_luma = PredictionMode::GLOBAL_GLOBALMV;
3076 }
3077 mode_chroma = mode_luma;
3078 } else if !is_compound && mode_luma != PredictionMode::GLOBALMV {
3079 mode_luma = PredictionMode::NEWMV;
3080 for (c, m) in mv_stack.iter().take(4).zip(
3081 [
3082 PredictionMode::NEARESTMV,
3083 PredictionMode::NEAR0MV,
3084 PredictionMode::NEAR1MV,
3085 PredictionMode::NEAR2MV,
3086 ]
3087 .iter(),
3088 ) {
3089 if c.this_mv.row == mvs[0].row && c.this_mv.col == mvs[0].col {
3090 mode_luma = *m;
3091 }
3092 }
3093 if mode_luma == PredictionMode::NEWMV
3094 && mvs[0].row == 0
3095 && mvs[0].col == 0
3096 {
3097 mode_luma = if mv_stack.is_empty() {
3098 PredictionMode::NEARESTMV
3099 } else if mv_stack.len() == 1 {
3100 PredictionMode::NEAR0MV
3101 } else {
3102 PredictionMode::GLOBALMV
3103 };
3104 }
3105 mode_chroma = mode_luma;
3106 }
3107
3108 save_block_motion(
3109 ts,
3110 part_decision.bsize,
3111 part_decision.bo,
3112 part_decision.ref_frames[0].to_index(),
3113 part_decision.mvs[0],
3114 );
3115 }
3116
3117 cdef_coded = encode_block_pre_cdef(
3119 &fi.sequence,
3120 ts,
3121 cw,
3122 if cdef_coded { w_post_cdef } else { w_pre_cdef },
3123 bsize,
3124 tile_bo,
3125 skip,
3126 );
3127 encode_block_post_cdef(
3128 fi,
3129 ts,
3130 cw,
3131 if cdef_coded { w_post_cdef } else { w_pre_cdef },
3132 mode_luma,
3133 mode_chroma,
3134 part_decision.angle_delta,
3135 ref_frames,
3136 mvs,
3137 bsize,
3138 tile_bo,
3139 skip,
3140 cfl,
3141 tx_size,
3142 tx_type,
3143 mode_context,
3144 &mv_stack,
3145 RDOType::PixelDistRealRate,
3146 true,
3147 Some(enc_stats),
3148 );
3149 }
3150 PARTITION_SPLIT | PARTITION_HORZ | PARTITION_VERT => {
3151 if !rdo_output.part_modes.is_empty() {
3152 debug_assert!(can_split && !must_split);
3153
3154 for mode in rdo_output.part_modes {
3156 encode_partition_topdown(
3158 fi,
3159 ts,
3160 cw,
3161 w_pre_cdef,
3162 w_post_cdef,
3163 subsize,
3164 mode.bo,
3165 &Some(PartitionGroupParameters {
3166 rd_cost: mode.rd_cost,
3167 part_type: PartitionType::PARTITION_NONE,
3168 part_modes: [mode][..].try_into().unwrap(),
3169 }),
3170 inter_cfg,
3171 enc_stats,
3172 );
3173 }
3174 } else {
3175 debug_assert!(must_split);
3176 let hbsw = subsize.width_mi(); let hbsh = subsize.height_mi(); let four_partitions = [
3179 tile_bo,
3180 TileBlockOffset(BlockOffset {
3181 x: tile_bo.0.x + hbsw,
3182 y: tile_bo.0.y,
3183 }),
3184 TileBlockOffset(BlockOffset {
3185 x: tile_bo.0.x,
3186 y: tile_bo.0.y + hbsh,
3187 }),
3188 TileBlockOffset(BlockOffset {
3189 x: tile_bo.0.x + hbsw,
3190 y: tile_bo.0.y + hbsh,
3191 }),
3192 ];
3193 let partitions = get_sub_partitions(&four_partitions, partition);
3194
3195 partitions.iter().for_each(|&offset| {
3196 encode_partition_topdown(
3197 fi,
3198 ts,
3199 cw,
3200 w_pre_cdef,
3201 w_post_cdef,
3202 subsize,
3203 offset,
3204 &None,
3205 inter_cfg,
3206 enc_stats,
3207 );
3208 });
3209 }
3210 }
3211 _ => unreachable!(),
3212 }
3213
3214 if is_square
3215 && bsize >= BlockSize::BLOCK_8X8
3216 && (bsize == BlockSize::BLOCK_8X8
3217 || partition != PartitionType::PARTITION_SPLIT)
3218 {
3219 cw.bc.update_partition_context(tile_bo, subsize, bsize);
3220 }
3221}
3222
3223fn get_initial_cdfcontext<T: Pixel>(fi: &FrameInvariants<T>) -> CDFContext {
3224 let cdf = if fi.primary_ref_frame == PRIMARY_REF_NONE {
3225 None
3226 } else {
3227 let ref_frame_idx = fi.ref_frames[fi.primary_ref_frame as usize] as usize;
3228 let ref_frame = fi.rec_buffer.frames[ref_frame_idx].as_ref();
3229 ref_frame.map(|rec| rec.cdfs)
3230 };
3231
3232 cdf.unwrap_or_else(|| CDFContext::new(fi.base_q_idx))
3234}
3235
3236#[profiling::function]
3237fn encode_tile_group<T: Pixel>(
3238 fi: &FrameInvariants<T>, fs: &mut FrameState<T>, inter_cfg: &InterConfig,
3239) -> Vec<u8> {
3240 let planes =
3241 if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 };
3242 let mut blocks = FrameBlocks::new(fi.w_in_b, fi.h_in_b);
3243 let ti = &fi.sequence.tiling;
3244
3245 let initial_cdf = get_initial_cdfcontext(fi);
3246 let mut cdfs = vec![initial_cdf; ti.tile_count()];
3248
3249 let (raw_tiles, stats): (Vec<_>, Vec<_>) = ti
3250 .tile_iter_mut(fs, &mut blocks)
3251 .zip(cdfs.iter_mut())
3252 .collect::<Vec<_>>()
3253 .into_par_iter()
3254 .map(|(mut ctx, cdf)| {
3255 encode_tile(fi, &mut ctx.ts, cdf, &mut ctx.tb, inter_cfg)
3256 })
3257 .unzip();
3258
3259 for tile_stats in stats {
3260 fs.enc_stats += &tile_stats;
3261 }
3262
3263 let levels = fs.apply_tile_state_mut(|ts| {
3268 let rec = &mut ts.rec;
3269 deblock_filter_optimize(
3270 fi,
3271 &rec.as_const(),
3272 &ts.input.as_tile(),
3273 &blocks.as_tile_blocks(),
3274 fi.width,
3275 fi.height,
3276 )
3277 });
3278 fs.deblock.levels = levels;
3279
3280 if fs.deblock.levels[0] != 0 || fs.deblock.levels[1] != 0 {
3281 fs.apply_tile_state_mut(|ts| {
3282 let rec = &mut ts.rec;
3283 deblock_filter_frame(
3284 ts.deblock,
3285 rec,
3286 &blocks.as_tile_blocks(),
3287 fi.width,
3288 fi.height,
3289 fi.sequence.bit_depth,
3290 planes,
3291 );
3292 });
3293 }
3294
3295 if fi.sequence.enable_restoration {
3296 let deblocked_frame = (*fs.rec).clone();
3299
3300 if fi.sequence.enable_cdef {
3302 fs.apply_tile_state_mut(|ts| {
3303 let rec = &mut ts.rec;
3304 cdef_filter_tile(fi, &deblocked_frame, &blocks.as_tile_blocks(), rec);
3305 });
3306 }
3307 fs.restoration.lrf_filter_frame(
3309 Arc::get_mut(&mut fs.rec).unwrap(),
3310 &deblocked_frame,
3311 fi,
3312 );
3313 } else {
3314 if fi.sequence.enable_cdef {
3316 let deblocked_frame = (*fs.rec).clone();
3317 fs.apply_tile_state_mut(|ts| {
3318 let rec = &mut ts.rec;
3319 cdef_filter_tile(fi, &deblocked_frame, &blocks.as_tile_blocks(), rec);
3320 });
3321 }
3322 }
3323
3324 let (idx_max, max_len) = raw_tiles
3325 .iter()
3326 .map(Vec::len)
3327 .enumerate()
3328 .max_by_key(|&(_, len)| len)
3329 .unwrap();
3330
3331 if !fi.disable_frame_end_update_cdf {
3332 fs.context_update_tile_id = idx_max;
3334 fs.cdfs = cdfs[idx_max];
3335 fs.cdfs.reset_counts();
3336 }
3337
3338 let max_tile_size_bytes = ILog::ilog(max_len).div_ceil(8) as u32;
3339 debug_assert!(max_tile_size_bytes > 0 && max_tile_size_bytes <= 4);
3340 fs.max_tile_size_bytes = max_tile_size_bytes;
3341
3342 build_raw_tile_group(ti, &raw_tiles, max_tile_size_bytes)
3343}
3344
3345fn build_raw_tile_group(
3346 ti: &TilingInfo, raw_tiles: &[Vec<u8>], max_tile_size_bytes: u32,
3347) -> Vec<u8> {
3348 let mut raw = Vec::new();
3350 let mut bw = BitWriter::endian(&mut raw, BigEndian);
3351 if ti.cols * ti.rows > 1 {
3352 bw.write_bit(false).unwrap();
3354 }
3355 bw.byte_align().unwrap();
3356 for (i, raw_tile) in raw_tiles.iter().enumerate() {
3357 let last = raw_tiles.len() - 1;
3358 if i != last {
3359 let tile_size_minus_1 = raw_tile.len() - 1;
3360 bw.write_le(max_tile_size_bytes, tile_size_minus_1 as u64).unwrap();
3361 }
3362 bw.write_bytes(raw_tile).unwrap();
3363 }
3364 raw
3365}
3366
3367pub struct SBSQueueEntry {
3368 pub sbo: TileSuperBlockOffset,
3369 pub lru_index: [i32; MAX_PLANES],
3370 pub cdef_coded: bool,
3371 pub w_pre_cdef: WriterBase<WriterRecorder>,
3372 pub w_post_cdef: WriterBase<WriterRecorder>,
3373}
3374
3375#[profiling::function]
3376fn check_lf_queue<T: Pixel>(
3377 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
3378 cw: &mut ContextWriter, w: &mut WriterBase<WriterEncoder>,
3379 sbs_q: &mut VecDeque<SBSQueueEntry>, last_lru_ready: &mut [i32; 3],
3380 last_lru_rdoed: &mut [i32; 3], last_lru_coded: &mut [i32; 3],
3381 deblock_p: bool,
3382) {
3383 let mut check_queue = true;
3384 let planes = if fi.sequence.chroma_sampling == ChromaSampling::Cs400 {
3385 1
3386 } else {
3387 MAX_PLANES
3388 };
3389
3390 while check_queue {
3392 if let Some(qe) = sbs_q.front_mut() {
3393 for pli in 0..planes {
3394 if qe.lru_index[pli] > last_lru_ready[pli] {
3395 check_queue = false;
3396 break;
3397 }
3398 }
3399 if check_queue {
3400 if qe.cdef_coded || fi.sequence.enable_restoration {
3402 let mut already_rdoed = false;
3419 for pli in 0..planes {
3420 if qe.lru_index[pli] != -1
3421 && qe.lru_index[pli] <= last_lru_rdoed[pli]
3422 {
3423 already_rdoed = true;
3424 break;
3425 }
3426 }
3427 if !already_rdoed {
3428 rdo_loop_decision(qe.sbo, fi, ts, cw, w, deblock_p);
3429 for pli in 0..planes {
3430 if qe.lru_index[pli] != -1
3431 && last_lru_rdoed[pli] < qe.lru_index[pli]
3432 {
3433 last_lru_rdoed[pli] = qe.lru_index[pli];
3434 }
3435 }
3436 }
3437 }
3438 if !fi.allow_intrabc && fi.sequence.enable_restoration {
3440 for pli in 0..planes {
3442 if qe.lru_index[pli] != -1
3443 && last_lru_coded[pli] < qe.lru_index[pli]
3444 {
3445 last_lru_coded[pli] = qe.lru_index[pli];
3446 cw.write_lrf(w, &mut ts.restoration, qe.sbo, pli);
3447 }
3448 }
3449 }
3450 qe.w_pre_cdef.replay(w);
3452 if qe.cdef_coded {
3454 let cdef_index = cw.bc.blocks.get_cdef(qe.sbo);
3455 cw.write_cdef(w, cdef_index, fi.cdef_bits);
3456 qe.w_post_cdef.replay(w);
3458 }
3459 sbs_q.pop_front();
3460 }
3461 } else {
3462 check_queue = false;
3463 }
3464 }
3465}
3466
3467#[profiling::function]
3468fn encode_tile<'a, T: Pixel>(
3469 fi: &FrameInvariants<T>, ts: &'a mut TileStateMut<'_, T>,
3470 fc: &'a mut CDFContext, blocks: &'a mut TileBlocksMut<'a>,
3471 inter_cfg: &InterConfig,
3472) -> (Vec<u8>, EncoderStats) {
3473 let mut enc_stats = EncoderStats::default();
3474 let mut w = WriterEncoder::new();
3475 let planes =
3476 if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 };
3477
3478 let bc = BlockContext::new(blocks);
3479 let mut cw = ContextWriter::new(fc, bc);
3480 let mut sbs_q: VecDeque<SBSQueueEntry> = VecDeque::new();
3481 let mut last_lru_ready = [-1; 3];
3482 let mut last_lru_rdoed = [-1; 3];
3483 let mut last_lru_coded = [-1; 3];
3484
3485 for sby in 0..ts.sb_height {
3487 cw.bc.reset_left_contexts(planes);
3488
3489 for sbx in 0..ts.sb_width {
3490 cw.fc_log.clear();
3491
3492 let tile_sbo = TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby });
3493 let mut sbs_qe = SBSQueueEntry {
3494 sbo: tile_sbo,
3495 lru_index: [-1; MAX_PLANES],
3496 cdef_coded: false,
3497 w_pre_cdef: WriterRecorder::new(),
3498 w_post_cdef: WriterRecorder::new(),
3499 };
3500
3501 let tile_bo = tile_sbo.block_offset(0, 0);
3502 cw.bc.cdef_coded = false;
3503 cw.bc.code_deltas = fi.delta_q_present;
3504
3505 let is_straddle_sbx =
3506 tile_bo.0.x + BlockSize::BLOCK_64X64.width_mi() > ts.mi_width;
3507 let is_straddle_sby =
3508 tile_bo.0.y + BlockSize::BLOCK_64X64.height_mi() > ts.mi_height;
3509
3510 if fi.config.speed_settings.partition.encode_bottomup
3512 || is_straddle_sbx
3513 || is_straddle_sby
3514 {
3515 encode_partition_bottomup(
3516 fi,
3517 ts,
3518 &mut cw,
3519 &mut sbs_qe.w_pre_cdef,
3520 &mut sbs_qe.w_post_cdef,
3521 BlockSize::BLOCK_64X64,
3522 tile_bo,
3523 f64::MAX,
3524 inter_cfg,
3525 &mut enc_stats,
3526 );
3527 } else {
3528 encode_partition_topdown(
3529 fi,
3530 ts,
3531 &mut cw,
3532 &mut sbs_qe.w_pre_cdef,
3533 &mut sbs_qe.w_post_cdef,
3534 BlockSize::BLOCK_64X64,
3535 tile_bo,
3536 &None,
3537 inter_cfg,
3538 &mut enc_stats,
3539 );
3540 }
3541
3542 {
3543 let mut check_queue = false;
3544 sbs_qe.cdef_coded = cw.bc.cdef_coded;
3546 for pli in 0..planes {
3547 if let Some((lru_x, lru_y)) =
3548 ts.restoration.planes[pli].restoration_unit_index(tile_sbo, false)
3549 {
3550 let lru_index = ts.restoration.planes[pli]
3551 .restoration_unit_countable(lru_x, lru_y)
3552 as i32;
3553 sbs_qe.lru_index[pli] = lru_index;
3554 if ts.restoration.planes[pli]
3555 .restoration_unit_last_sb_for_rdo(fi, ts.sbo, tile_sbo)
3556 {
3557 last_lru_ready[pli] = lru_index;
3558 check_queue = true;
3559 }
3560 } else {
3561 sbs_qe.lru_index[pli] = -1;
3564 check_queue = true;
3565 }
3566 }
3567 sbs_q.push_back(sbs_qe);
3568
3569 if check_queue && !fi.sequence.enable_delayed_loopfilter_rdo {
3570 check_lf_queue(
3571 fi,
3572 ts,
3573 &mut cw,
3574 &mut w,
3575 &mut sbs_q,
3576 &mut last_lru_ready,
3577 &mut last_lru_rdoed,
3578 &mut last_lru_coded,
3579 true,
3580 );
3581 }
3582 }
3583 }
3584 }
3585
3586 if fi.sequence.enable_delayed_loopfilter_rdo {
3587 let deblock_levels = deblock_filter_optimize(
3590 fi,
3591 &ts.rec.as_const(),
3592 &ts.input_tile,
3593 &cw.bc.blocks.as_const(),
3594 fi.width,
3595 fi.height,
3596 );
3597
3598 if deblock_levels[0] != 0 || deblock_levels[1] != 0 {
3599 let rec_copy = if planes == 3 {
3601 vec![
3602 ts.rec.planes[0].scratch_copy(),
3603 ts.rec.planes[1].scratch_copy(),
3604 ts.rec.planes[2].scratch_copy(),
3605 ]
3606 } else {
3607 vec![ts.rec.planes[0].scratch_copy()]
3608 };
3609
3610 let mut deblock_copy = *ts.deblock;
3612 deblock_copy.levels = deblock_levels;
3613
3614 deblock_filter_frame(
3616 &deblock_copy,
3617 &mut ts.rec,
3618 &cw.bc.blocks.as_const(),
3619 fi.width,
3620 fi.height,
3621 fi.sequence.bit_depth,
3622 planes,
3623 );
3624
3625 check_lf_queue(
3627 fi,
3628 ts,
3629 &mut cw,
3630 &mut w,
3631 &mut sbs_q,
3632 &mut last_lru_ready,
3633 &mut last_lru_rdoed,
3634 &mut last_lru_coded,
3635 false,
3636 );
3637
3638 for pli in 0..planes {
3640 let dst = &mut ts.rec.planes[pli];
3641 let src = &rec_copy[pli];
3642 for (dst_row, src_row) in dst.rows_iter_mut().zip(src.rows_iter()) {
3643 for (out, input) in dst_row.iter_mut().zip(src_row) {
3644 *out = *input;
3645 }
3646 }
3647 }
3648 } else {
3649 check_lf_queue(
3651 fi,
3652 ts,
3653 &mut cw,
3654 &mut w,
3655 &mut sbs_q,
3656 &mut last_lru_ready,
3657 &mut last_lru_rdoed,
3658 &mut last_lru_coded,
3659 false,
3660 );
3661 }
3662 }
3663
3664 assert!(
3665 sbs_q.is_empty(),
3666 "Superblock queue not empty in tile at offset {}:{}",
3667 ts.sbo.0.x,
3668 ts.sbo.0.y
3669 );
3670 (w.done(), enc_stats)
3671}
3672
3673#[allow(unused)]
3674fn write_tile_group_header(tile_start_and_end_present_flag: bool) -> Vec<u8> {
3675 let mut buf = Vec::new();
3676 {
3677 let mut bw = BitWriter::endian(&mut buf, BigEndian);
3678 bw.write_bit(tile_start_and_end_present_flag).unwrap();
3679 bw.byte_align().unwrap();
3680 }
3681 buf
3682}
3683
3684#[profiling::function]
3693pub fn encode_show_existing_frame<T: Pixel>(
3694 fi: &FrameInvariants<T>, fs: &mut FrameState<T>, inter_cfg: &InterConfig,
3695) -> Vec<u8> {
3696 debug_assert!(fi.is_show_existing_frame());
3697 let obu_extension = 0;
3698
3699 let mut packet = Vec::new();
3700
3701 if fi.frame_type == FrameType::KEY {
3702 write_key_frame_obus(&mut packet, fi, obu_extension).unwrap();
3703 }
3704
3705 for t35 in fi.t35_metadata.iter() {
3706 let mut t35_buf = Vec::new();
3707 let mut t35_bw = BitWriter::endian(&mut t35_buf, BigEndian);
3708 t35_bw.write_t35_metadata_obu(t35).unwrap();
3709 packet.write_all(&t35_buf).unwrap();
3710 t35_buf.clear();
3711 }
3712
3713 let mut buf1 = Vec::new();
3714 let mut buf2 = Vec::new();
3715 {
3716 let mut bw2 = BitWriter::endian(&mut buf2, BigEndian);
3717 bw2.write_frame_header_obu(fi, fs, inter_cfg).unwrap();
3718 }
3719
3720 {
3721 let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
3722 bw1.write_obu_header(ObuType::OBU_FRAME_HEADER, obu_extension).unwrap();
3723 }
3724 packet.write_all(&buf1).unwrap();
3725 buf1.clear();
3726
3727 {
3728 let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
3729 bw1.write_uleb128(buf2.len() as u64).unwrap();
3730 }
3731 packet.write_all(&buf1).unwrap();
3732 buf1.clear();
3733
3734 packet.write_all(&buf2).unwrap();
3735 buf2.clear();
3736
3737 let map_idx = fi.frame_to_show_map_idx as usize;
3738 if let Some(ref rec) = fi.rec_buffer.frames[map_idx] {
3739 let fs_rec = Arc::get_mut(&mut fs.rec).unwrap();
3740 let planes =
3741 if fi.sequence.chroma_sampling == ChromaSampling::Cs400 { 1 } else { 3 };
3742 for p in 0..planes {
3743 fs_rec.planes[p].data.copy_from_slice(&rec.frame.planes[p].data);
3744 }
3745 }
3746 packet
3747}
3748
3749fn get_initial_segmentation<T: Pixel>(
3750 fi: &FrameInvariants<T>,
3751) -> SegmentationState {
3752 let segmentation = if fi.primary_ref_frame == PRIMARY_REF_NONE {
3753 None
3754 } else {
3755 let ref_frame_idx = fi.ref_frames[fi.primary_ref_frame as usize] as usize;
3756 let ref_frame = fi.rec_buffer.frames[ref_frame_idx].as_ref();
3757 ref_frame.map(|rec| rec.segmentation)
3758 };
3759
3760 segmentation.unwrap_or_default()
3762}
3763
3764#[profiling::function]
3768pub fn encode_frame<T: Pixel>(
3769 fi: &FrameInvariants<T>, fs: &mut FrameState<T>, inter_cfg: &InterConfig,
3770) -> Vec<u8> {
3771 debug_assert!(!fi.is_show_existing_frame());
3772 let obu_extension = 0;
3773
3774 let mut packet = Vec::new();
3775
3776 if fi.enable_segmentation {
3777 fs.segmentation = get_initial_segmentation(fi);
3778 segmentation_optimize(fi, fs);
3779 }
3780 let tile_group = encode_tile_group(fi, fs, inter_cfg);
3781
3782 if fi.frame_type == FrameType::KEY {
3783 write_key_frame_obus(&mut packet, fi, obu_extension).unwrap();
3784 }
3785
3786 for t35 in fi.t35_metadata.iter() {
3787 let mut t35_buf = Vec::new();
3788 let mut t35_bw = BitWriter::endian(&mut t35_buf, BigEndian);
3789 t35_bw.write_t35_metadata_obu(t35).unwrap();
3790 packet.write_all(&t35_buf).unwrap();
3791 t35_buf.clear();
3792 }
3793
3794 let mut buf1 = Vec::new();
3795 let mut buf2 = Vec::new();
3796 {
3797 let mut bw2 = BitWriter::endian(&mut buf2, BigEndian);
3798 bw2.write_frame_header_obu(fi, fs, inter_cfg).unwrap();
3799 }
3800
3801 {
3802 let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
3803 bw1.write_obu_header(ObuType::OBU_FRAME, obu_extension).unwrap();
3804 }
3805 packet.write_all(&buf1).unwrap();
3806 buf1.clear();
3807
3808 {
3809 let mut bw1 = BitWriter::endian(&mut buf1, BigEndian);
3810 bw1.write_uleb128((buf2.len() + tile_group.len()) as u64).unwrap();
3811 }
3812 packet.write_all(&buf1).unwrap();
3813 buf1.clear();
3814
3815 packet.write_all(&buf2).unwrap();
3816 buf2.clear();
3817
3818 packet.write_all(&tile_group).unwrap();
3819 packet
3820}
3821
3822pub fn update_rec_buffer<T: Pixel>(
3823 output_frameno: u64, fi: &mut FrameInvariants<T>, fs: &FrameState<T>,
3824) {
3825 let rfs = Arc::new(ReferenceFrame {
3826 order_hint: fi.order_hint,
3827 width: fi.width as u32,
3828 height: fi.height as u32,
3829 render_width: fi.render_width,
3830 render_height: fi.render_height,
3831 frame: fs.rec.clone(),
3832 input_hres: fs.input_hres.clone(),
3833 input_qres: fs.input_qres.clone(),
3834 cdfs: fs.cdfs,
3835 frame_me_stats: fs.frame_me_stats.clone(),
3836 output_frameno,
3837 segmentation: fs.segmentation,
3838 });
3839 for i in 0..REF_FRAMES {
3840 if (fi.refresh_frame_flags & (1 << i)) != 0 {
3841 fi.rec_buffer.frames[i] = Some(Arc::clone(&rfs));
3842 fi.rec_buffer.deblock[i] = fs.deblock;
3843 }
3844 }
3845}
3846
3847#[cfg(test)]
3848mod test {
3849 use super::*;
3850
3851 #[test]
3852 fn check_partition_types_order() {
3853 assert_eq!(
3854 RAV1E_PARTITION_TYPES[RAV1E_PARTITION_TYPES.len() - 1],
3855 PartitionType::PARTITION_SPLIT
3856 );
3857 }
3858}