1use crate::{DecodedFrame, NalUnit, NalUnitType, VideoCodec, VideoDecoder, VideoError};
30
31pub struct BitstreamReader<'a> {
37 pub(crate) data: &'a [u8],
38 pub(crate) byte_offset: usize,
39 pub(crate) bit_offset: u8, }
41
42impl<'a> BitstreamReader<'a> {
43 pub fn new(data: &'a [u8]) -> Self {
44 Self {
45 data,
46 byte_offset: 0,
47 bit_offset: 0,
48 }
49 }
50
51 pub fn bits_remaining(&self) -> usize {
53 if self.byte_offset >= self.data.len() {
54 return 0;
55 }
56 (self.data.len() - self.byte_offset) * 8 - self.bit_offset as usize
57 }
58
59 pub fn read_bit(&mut self) -> Result<u8, VideoError> {
61 if self.byte_offset >= self.data.len() {
62 return Err(VideoError::Codec("bitstream exhausted".into()));
63 }
64 let bit = (self.data[self.byte_offset] >> (7 - self.bit_offset)) & 1;
65 self.bit_offset += 1;
66 if self.bit_offset == 8 {
67 self.bit_offset = 0;
68 self.byte_offset += 1;
69 }
70 Ok(bit)
71 }
72
73 pub fn read_bits(&mut self, n: u8) -> Result<u32, VideoError> {
75 if n > 32 {
76 return Err(VideoError::Codec(format!(
77 "read_bits: requested {n} bits, max is 32"
78 )));
79 }
80 let mut value = 0u32;
81 for _ in 0..n {
82 value = (value << 1) | self.read_bit()? as u32;
83 }
84 Ok(value)
85 }
86
87 pub fn read_ue(&mut self) -> Result<u32, VideoError> {
89 let mut leading_zeros = 0u32;
90 while self.read_bit()? == 0 {
91 leading_zeros += 1;
92 if leading_zeros > 31 {
93 return Err(VideoError::Codec("exp-golomb overflow".into()));
94 }
95 }
96 if leading_zeros == 0 {
97 return Ok(0);
98 }
99 let suffix = self.read_bits(leading_zeros as u8)?;
100 Ok((1 << leading_zeros) - 1 + suffix)
101 }
102
103 pub fn read_se(&mut self) -> Result<i32, VideoError> {
105 let code = self.read_ue()?;
106 let value = code.div_ceil(2) as i32;
107 if code % 2 == 0 { Ok(-value) } else { Ok(value) }
108 }
109
110 pub fn skip_bits(&mut self, n: usize) -> Result<(), VideoError> {
112 for _ in 0..n {
113 self.read_bit()?;
114 }
115 Ok(())
116 }
117}
118
119#[derive(Debug, Clone)]
125pub struct Sps {
126 pub profile_idc: u8,
127 pub level_idc: u8,
128 pub sps_id: u32,
129 pub chroma_format_idc: u32,
130 pub bit_depth_luma: u32,
131 pub bit_depth_chroma: u32,
132 pub log2_max_frame_num: u32,
133 pub pic_order_cnt_type: u32,
134 pub max_num_ref_frames: u32,
135 pub pic_width_in_mbs: u32,
136 pub pic_height_in_map_units: u32,
137 pub frame_mbs_only_flag: bool,
138 pub mb_adaptive_frame_field_flag: bool,
139 pub frame_crop_left: u32,
140 pub frame_crop_right: u32,
141 pub frame_crop_top: u32,
142 pub frame_crop_bottom: u32,
143}
144
145impl Sps {
146 pub fn width(&self) -> usize {
148 (self.pic_width_in_mbs * 16) as usize
149 }
150
151 pub fn height(&self) -> usize {
153 let mbs_height = if self.frame_mbs_only_flag {
154 self.pic_height_in_map_units
155 } else {
156 self.pic_height_in_map_units * 2
157 };
158 (mbs_height * 16) as usize
159 }
160
161 pub fn cropped_width(&self) -> usize {
165 let sub_width_c = if self.chroma_format_idc == 1 { 2 } else { 1 };
166 let crop = (self.frame_crop_left + self.frame_crop_right) as usize * sub_width_c;
167 self.width().saturating_sub(crop).max(1)
168 }
169
170 pub fn cropped_height(&self) -> usize {
174 let sub_height_c = if self.chroma_format_idc == 1 { 2 } else { 1 };
175 let factor = if self.frame_mbs_only_flag { 1 } else { 2 };
176 let crop = (self.frame_crop_top + self.frame_crop_bottom) as usize * sub_height_c * factor;
177 self.height().saturating_sub(crop).max(1)
178 }
179}
180
181pub fn parse_sps(nal_data: &[u8]) -> Result<Sps, VideoError> {
183 if nal_data.is_empty() {
184 return Err(VideoError::Codec("empty SPS data".into()));
185 }
186
187 let rbsp = remove_emulation_prevention(nal_data);
189 let mut r = BitstreamReader::new(&rbsp);
190
191 let profile_idc = r.read_bits(8)? as u8;
192 let _constraint_flags = r.read_bits(8)?; let level_idc = r.read_bits(8)? as u8;
194 let sps_id = r.read_ue()?;
195
196 let mut chroma_format_idc = 1u32;
197 let mut bit_depth_luma = 8u32;
198 let mut bit_depth_chroma = 8u32;
199
200 if profile_idc == 100
202 || profile_idc == 110
203 || profile_idc == 122
204 || profile_idc == 244
205 || profile_idc == 44
206 || profile_idc == 83
207 || profile_idc == 86
208 || profile_idc == 118
209 || profile_idc == 128
210 {
211 chroma_format_idc = r.read_ue()?;
212 if chroma_format_idc == 3 {
213 let _separate_colour_plane_flag = r.read_bit()?;
214 }
215 bit_depth_luma = r.read_ue()? + 8;
216 bit_depth_chroma = r.read_ue()? + 8;
217 let _qpprime_y_zero_transform_bypass = r.read_bit()?;
218 let seq_scaling_matrix_present = r.read_bit()?;
219 if seq_scaling_matrix_present == 1 {
220 let count = if chroma_format_idc != 3 { 8 } else { 12 };
221 for _ in 0..count {
222 let present = r.read_bit()?;
223 if present == 1 {
224 let size = if count <= 6 { 16 } else { 64 };
225 skip_scaling_list(&mut r, size)?;
226 }
227 }
228 }
229 }
230
231 let log2_max_frame_num = r.read_ue()? + 4;
232 let pic_order_cnt_type = r.read_ue()?;
233
234 if pic_order_cnt_type == 0 {
235 let _log2_max_pic_order_cnt_lsb = r.read_ue()?;
236 } else if pic_order_cnt_type == 1 {
237 let _delta_pic_order_always_zero_flag = r.read_bit()?;
238 let _offset_for_non_ref_pic = r.read_se()?;
239 let _offset_for_top_to_bottom = r.read_se()?;
240 let num_ref_frames_in_poc = r.read_ue()?;
241 if num_ref_frames_in_poc > 255 {
242 return Err(VideoError::Codec(format!(
243 "SPS num_ref_frames_in_pic_order_cnt_cycle too large: {num_ref_frames_in_poc}"
244 )));
245 }
246 for _ in 0..num_ref_frames_in_poc {
247 let _offset = r.read_se()?;
248 }
249 }
250
251 let max_num_ref_frames = r.read_ue()?;
252 let _gaps_in_frame_num_allowed = r.read_bit()?;
253 let pic_width_in_mbs = r.read_ue()? + 1;
254 let pic_height_in_map_units = r.read_ue()? + 1;
255 let frame_mbs_only_flag = r.read_bit()? == 1;
256
257 let mb_adaptive_frame_field_flag = if !frame_mbs_only_flag {
258 r.read_bit()? == 1
259 } else {
260 false
261 };
262
263 let _direct_8x8_inference = r.read_bit()?;
264
265 let mut frame_crop_left = 0u32;
266 let mut frame_crop_right = 0u32;
267 let mut frame_crop_top = 0u32;
268 let mut frame_crop_bottom = 0u32;
269
270 let frame_cropping_flag = r.read_bit()?;
271 if frame_cropping_flag == 1 {
272 frame_crop_left = r.read_ue()?;
273 frame_crop_right = r.read_ue()?;
274 frame_crop_top = r.read_ue()?;
275 frame_crop_bottom = r.read_ue()?;
276 }
277
278 Ok(Sps {
279 profile_idc,
280 level_idc,
281 sps_id,
282 chroma_format_idc,
283 bit_depth_luma,
284 bit_depth_chroma,
285 log2_max_frame_num,
286 pic_order_cnt_type,
287 max_num_ref_frames,
288 pic_width_in_mbs,
289 pic_height_in_map_units,
290 frame_mbs_only_flag,
291 mb_adaptive_frame_field_flag,
292 frame_crop_left,
293 frame_crop_right,
294 frame_crop_top,
295 frame_crop_bottom,
296 })
297}
298
299fn skip_scaling_list(r: &mut BitstreamReader<'_>, size: usize) -> Result<(), VideoError> {
300 let mut last_scale = 8i32;
301 let mut next_scale = 8i32;
302 for _ in 0..size {
303 if next_scale != 0 {
304 let delta = r.read_se()?;
305 next_scale = (last_scale + delta + 256) % 256;
306 }
307 last_scale = if next_scale == 0 {
308 last_scale
309 } else {
310 next_scale
311 };
312 }
313 Ok(())
314}
315
316fn remove_emulation_prevention(data: &[u8]) -> Vec<u8> {
318 let mut result = Vec::with_capacity(data.len());
319 let mut i = 0;
320 while i < data.len() {
321 if i + 2 < data.len() && data[i] == 0x00 && data[i + 1] == 0x00 && data[i + 2] == 0x03 {
322 result.push(0x00);
323 result.push(0x00);
324 i += 3; } else {
326 result.push(data[i]);
327 i += 1;
328 }
329 }
330 result
331}
332
333#[derive(Debug, Clone)]
339pub struct Pps {
340 pub pps_id: u32,
341 pub sps_id: u32,
342 pub entropy_coding_mode_flag: bool,
343 pub num_slice_groups: u32,
344 pub slice_group_map_type: u32,
345 pub run_length_minus1: Vec<u32>,
347 pub top_left: Vec<u32>,
349 pub bottom_right: Vec<u32>,
351 pub slice_group_change_direction_flag: bool,
353 pub slice_group_change_rate: u32,
355 pub slice_group_id: Vec<u32>,
357 pub num_ref_idx_l0_default_active: u32,
358 pub num_ref_idx_l1_default_active: u32,
359 pub pic_init_qp: i32,
360}
361
362pub fn parse_pps(nal_data: &[u8]) -> Result<Pps, VideoError> {
364 if nal_data.is_empty() {
365 return Err(VideoError::Codec("empty PPS data".into()));
366 }
367 let rbsp = remove_emulation_prevention(nal_data);
368 let mut r = BitstreamReader::new(&rbsp);
369
370 let pps_id = r.read_ue()?;
371 let sps_id = r.read_ue()?;
372 let entropy_coding_mode_flag = r.read_bit()? == 1;
373 let _bottom_field_pic_order = r.read_bit()?;
374 let num_slice_groups = r.read_ue()? + 1;
375
376 let mut slice_group_map_type = 0u32;
377 let mut run_length_minus1 = Vec::new();
378 let mut top_left = Vec::new();
379 let mut bottom_right = Vec::new();
380 let mut slice_group_change_direction_flag = false;
381 let mut slice_group_change_rate = 0u32;
382 let mut slice_group_id = Vec::new();
383
384 if num_slice_groups > 1 {
385 slice_group_map_type = r.read_ue()?;
386 match slice_group_map_type {
387 0 => {
388 for _ in 0..num_slice_groups {
390 run_length_minus1.push(r.read_ue()?);
391 }
392 }
393 2 => {
394 for _ in 0..num_slice_groups.saturating_sub(1) {
396 top_left.push(r.read_ue()?);
397 bottom_right.push(r.read_ue()?);
398 }
399 }
400 3..=5 => {
401 slice_group_change_direction_flag = r.read_bit()? == 1;
402 slice_group_change_rate = r.read_ue()? + 1;
403 }
404 6 => {
405 let pic_size_in_map_units = r.read_ue()? + 1;
406 let bits_needed = if num_slice_groups > 1 {
407 (32 - (num_slice_groups - 1).leading_zeros()).max(1) as u8
408 } else {
409 1
410 };
411 for _ in 0..pic_size_in_map_units {
412 slice_group_id.push(r.read_bits(bits_needed)?);
413 }
414 }
415 _ => {
416 }
418 }
419 }
420
421 let num_ref_idx_l0_default_active = r.read_ue()? + 1;
422 let num_ref_idx_l1_default_active = r.read_ue()? + 1;
423 let _weighted_pred_flag = r.read_bit()?;
425 let _weighted_bipred_idc = r.read_bits(2)?;
427 let pic_init_qp_minus26 = r.read_se()?;
429 let pic_init_qp = 26 + pic_init_qp_minus26;
430
431 Ok(Pps {
432 pps_id,
433 sps_id,
434 entropy_coding_mode_flag,
435 num_slice_groups,
436 slice_group_map_type,
437 run_length_minus1,
438 top_left,
439 bottom_right,
440 slice_group_change_direction_flag,
441 slice_group_change_rate,
442 slice_group_id,
443 num_ref_idx_l0_default_active,
444 num_ref_idx_l1_default_active,
445 pic_init_qp,
446 })
447}
448
449#[derive(Debug, Clone)]
455pub struct SliceHeader {
456 pub first_mb_in_slice: u32,
457 pub slice_type: u32,
458 pub pps_id: u32,
459 pub frame_num: u32,
460 pub field_pic_flag: bool,
462 pub bottom_field_flag: bool,
464 pub qp: i32,
465}
466
467fn parse_slice_header(
469 r: &mut BitstreamReader<'_>,
470 sps: &Sps,
471 pps: &Pps,
472 is_idr: bool,
473) -> Result<SliceHeader, VideoError> {
474 let first_mb_in_slice = r.read_ue()?;
475 let slice_type = r.read_ue()?;
476 let pps_id = r.read_ue()?;
477 let frame_num = r.read_bits(sps.log2_max_frame_num as u8)?;
478
479 let mut field_pic_flag = false;
480 let mut bottom_field_flag = false;
481 if !sps.frame_mbs_only_flag {
482 field_pic_flag = r.read_bit()? == 1;
483 if field_pic_flag {
484 bottom_field_flag = r.read_bit()? == 1;
485 }
486 }
487
488 if is_idr {
489 let _idr_pic_id = r.read_ue()?;
490 }
491
492 if sps.pic_order_cnt_type == 0 {
493 let log2_max_poc_lsb = sps.log2_max_frame_num; let _pic_order_cnt_lsb = r.read_bits(log2_max_poc_lsb as u8)?;
495 }
496
497 if is_idr {
499 let _no_output_of_prior_pics = r.read_bit()?;
500 let _long_term_reference_flag = r.read_bit()?;
501 }
502
503 let slice_qp_delta = r.read_se()?;
504 let qp = pps.pic_init_qp + slice_qp_delta;
505
506 Ok(SliceHeader {
507 first_mb_in_slice,
508 slice_type,
509 pps_id,
510 frame_num,
511 field_pic_flag,
512 bottom_field_flag,
513 qp,
514 })
515}
516
517pub fn inverse_dct_4x4(coeffs: &mut [i32; 16]) {
526 for i in 0..4 {
528 let base = i * 4;
529 let s0 = coeffs[base];
530 let s1 = coeffs[base + 1];
531 let s2 = coeffs[base + 2];
532 let s3 = coeffs[base + 3];
533
534 let e0 = s0 + s2;
535 let e1 = s0 - s2;
536 let e2 = (s1 >> 1) - s3;
537 let e3 = s1 + (s3 >> 1);
538
539 coeffs[base] = e0 + e3;
540 coeffs[base + 1] = e1 + e2;
541 coeffs[base + 2] = e1 - e2;
542 coeffs[base + 3] = e0 - e3;
543 }
544
545 for j in 0..4 {
547 let s0 = coeffs[j];
548 let s1 = coeffs[4 + j];
549 let s2 = coeffs[8 + j];
550 let s3 = coeffs[12 + j];
551
552 let e0 = s0 + s2;
553 let e1 = s0 - s2;
554 let e2 = (s1 >> 1) - s3;
555 let e3 = s1 + (s3 >> 1);
556
557 coeffs[j] = (e0 + e3 + 32) >> 6;
559 coeffs[4 + j] = (e1 + e2 + 32) >> 6;
560 coeffs[8 + j] = (e1 - e2 + 32) >> 6;
561 coeffs[12 + j] = (e0 - e3 + 32) >> 6;
562 }
563}
564
565const DEQUANT_SCALE: [[i32; 16]; 6] = [
572 [
573 10, 13, 10, 13, 13, 16, 13, 16, 10, 13, 10, 13, 13, 16, 13, 16,
574 ],
575 [
576 11, 14, 11, 14, 14, 18, 14, 18, 11, 14, 11, 14, 14, 18, 14, 18,
577 ],
578 [
579 13, 16, 13, 16, 16, 20, 16, 20, 13, 16, 13, 16, 16, 20, 16, 20,
580 ],
581 [
582 14, 18, 14, 18, 18, 23, 18, 23, 14, 18, 14, 18, 18, 23, 18, 23,
583 ],
584 [
585 16, 20, 16, 20, 20, 25, 20, 25, 16, 20, 16, 20, 20, 25, 20, 25,
586 ],
587 [
588 18, 23, 18, 23, 23, 29, 23, 29, 18, 23, 18, 23, 23, 29, 23, 29,
589 ],
590];
591
592pub fn dequant_4x4(coeffs: &mut [i32; 16], qp: i32) {
597 let qp = qp.clamp(0, 51);
598 let qp_div6 = (qp / 6) as u32;
599 let qp_mod6 = (qp % 6) as usize;
600 let scale = &DEQUANT_SCALE[qp_mod6];
601
602 for i in 0..16 {
603 coeffs[i] = (coeffs[i] * scale[i]) << qp_div6;
604 }
605}
606
607const ZIGZAG_4X4: [(usize, usize); 16] = [
613 (0, 0),
614 (0, 1),
615 (1, 0),
616 (2, 0),
617 (1, 1),
618 (0, 2),
619 (0, 3),
620 (1, 2),
621 (2, 1),
622 (3, 0),
623 (3, 1),
624 (2, 2),
625 (1, 3),
626 (2, 3),
627 (3, 2),
628 (3, 3),
629];
630
631fn unscan_4x4(scan_coeffs: &[i32], out: &mut [i32; 16]) {
633 *out = [0i32; 16];
634 for (scan_idx, &val) in scan_coeffs.iter().enumerate().take(16) {
635 let (r, c) = ZIGZAG_4X4[scan_idx];
636 out[r * 4 + c] = val;
637 }
638}
639
640fn decode_cavlc_on_reader(
649 bs: &mut BitstreamReader<'_>,
650 nc: i32,
651) -> Option<super::cavlc::CavlcResult> {
652 let start = bs.byte_offset;
653 let bit_off = bs.bit_offset;
654 let data = bs.data;
655 if start >= data.len() {
656 return None;
657 }
658 let slice = &data[start..];
659 let mut cr = super::cavlc::BitReader::new(slice);
660 if bit_off > 0 && cr.read_bits(bit_off).is_none() {
662 return None;
663 }
664 let result = super::cavlc::decode_cavlc_block(&mut cr, nc);
665 bs.byte_offset = start + cr.byte_pos;
667 bs.bit_offset = cr.bit_pos;
668 result
669}
670
671#[allow(clippy::too_many_arguments)]
682fn decode_macroblock(
683 reader: &mut BitstreamReader<'_>,
684 qp: i32,
685 mb_x: usize,
686 mb_y: usize,
687 y_plane: &mut [u8],
688 u_plane: &mut [u8],
689 v_plane: &mut [u8],
690 stride_y: usize,
691 stride_uv: usize,
692) -> Result<(), VideoError> {
693 let mb_type = reader.read_ue()?;
694
695 if mb_type == 25 {
696 if reader.bit_offset != 0 {
699 let skip = 8 - reader.bit_offset as usize;
700 reader.skip_bits(skip)?;
701 }
702 let px = mb_x * 16;
704 let py = mb_y * 16;
705 for row in 0..16 {
706 for col in 0..16 {
707 let val = reader.read_bits(8)? as u8;
708 let idx = (py + row) * stride_y + px + col;
709 if idx < y_plane.len() {
710 y_plane[idx] = val;
711 }
712 }
713 }
714 let cpx = mb_x * 8;
716 let cpy = mb_y * 8;
717 for row in 0..8 {
718 for col in 0..8 {
719 let val = reader.read_bits(8)? as u8;
720 let idx = (cpy + row) * stride_uv + cpx + col;
721 if idx < u_plane.len() {
722 u_plane[idx] = val;
723 }
724 }
725 }
726 for row in 0..8 {
727 for col in 0..8 {
728 let val = reader.read_bits(8)? as u8;
729 let idx = (cpy + row) * stride_uv + cpx + col;
730 if idx < v_plane.len() {
731 v_plane[idx] = val;
732 }
733 }
734 }
735 return Ok(());
736 }
737
738 let is_i16x16 = (1..=24).contains(&mb_type);
740 let is_i4x4 = mb_type == 0;
741
742 if is_i4x4 {
743 for _blk in 0..16 {
745 let prev_flag = reader.read_bit()?;
746 if prev_flag == 0 {
747 let _rem_mode = reader.read_bits(3)?;
748 }
749 }
750 }
751
752 let _chroma_pred_mode = reader.read_ue()?;
754
755 let cbp = if is_i16x16 {
757 let cbp_luma = if (mb_type - 1) / 12 >= 1 { 15 } else { 0 };
759 let cbp_chroma = ((mb_type - 1) / 4) % 3;
760 cbp_luma | (cbp_chroma << 4)
761 } else {
762 let cbp_code = reader.read_ue()?;
764 const CBP_INTRA: [u32; 48] = [
766 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, 16, 3, 5, 10, 12, 19, 21,
767 26, 28, 35, 37, 42, 44, 1, 2, 4, 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40,
768 38, 41,
769 ];
770 if (cbp_code as usize) < CBP_INTRA.len() {
771 CBP_INTRA[cbp_code as usize]
772 } else {
773 0
774 }
775 };
776
777 let qp = if cbp > 0 || is_i16x16 {
779 let qp_delta = reader.read_se()?;
780 (qp + qp_delta).rem_euclid(52)
781 } else {
782 qp
783 };
784
785 let px = mb_x * 16;
786 let py = mb_y * 16;
787
788 let mut luma_dc = [0i32; 16];
790 if is_i16x16 && let Some(result) = decode_cavlc_on_reader(reader, 0) {
791 let scan = super::cavlc::expand_cavlc_to_coefficients(&result, 16);
792 unscan_4x4(&scan, &mut luma_dc);
793 }
794
795 let luma_block_offsets: [(usize, usize); 16] = [
798 (0, 0),
799 (0, 4),
800 (4, 0),
801 (4, 4),
802 (0, 8),
803 (0, 12),
804 (4, 8),
805 (4, 12),
806 (8, 0),
807 (8, 4),
808 (12, 0),
809 (12, 4),
810 (8, 8),
811 (8, 12),
812 (12, 8),
813 (12, 12),
814 ];
815
816 for blk_idx in 0..16 {
817 let cbp_group = blk_idx / 4;
818 if cbp & (1 << cbp_group) == 0 && !is_i16x16 {
819 let dc_val = compute_dc_prediction_luma(
821 y_plane,
822 stride_y,
823 px + luma_block_offsets[blk_idx].1,
824 py + luma_block_offsets[blk_idx].0,
825 );
826 write_dc_block_luma(
827 y_plane,
828 stride_y,
829 px + luma_block_offsets[blk_idx].1,
830 py + luma_block_offsets[blk_idx].0,
831 dc_val,
832 );
833 continue;
834 }
835
836 let mut coeffs_scan = vec![0i32; 16];
837
838 if (cbp & (1 << cbp_group) != 0 || is_i16x16)
839 && let Some(result) = decode_cavlc_on_reader(reader, 0)
840 {
841 coeffs_scan = super::cavlc::expand_cavlc_to_coefficients(&result, 16);
842 }
843
844 let mut coeffs = [0i32; 16];
845 unscan_4x4(&coeffs_scan, &mut coeffs);
846
847 if is_i16x16 {
848 coeffs[0] = luma_dc[blk_idx];
849 }
850
851 dequant_4x4(&mut coeffs, qp);
852 inverse_dct_4x4(&mut coeffs);
853
854 let (boff_r, boff_c) = luma_block_offsets[blk_idx];
855 let block_x = px + boff_c;
856 let block_y = py + boff_r;
857 let dc_pred = compute_dc_prediction_luma(y_plane, stride_y, block_x, block_y);
858
859 for r in 0..4 {
860 for c in 0..4 {
861 let residual = coeffs[r * 4 + c];
862 let val = (dc_pred as i32 + residual).clamp(0, 255) as u8;
863 let idx = (block_y + r) * stride_y + block_x + c;
864 if idx < y_plane.len() {
865 y_plane[idx] = val;
866 }
867 }
868 }
869 }
870
871 let chroma_cbp = (cbp >> 4) & 3;
873 let cpx = mb_x * 8;
874 let cpy = mb_y * 8;
875 let chroma_block_offsets: [(usize, usize); 4] = [(0, 0), (0, 4), (4, 0), (4, 4)];
876
877 for plane_idx in 0..2 {
878 let plane = if plane_idx == 0 {
879 &mut *u_plane
880 } else {
881 &mut *v_plane
882 };
883
884 if chroma_cbp >= 1 {
886 let _dc_result = decode_cavlc_on_reader(reader, 0);
887 }
888
889 for blk_idx in 0..4 {
890 let (boff_r, boff_c) = chroma_block_offsets[blk_idx];
891 let block_x = cpx + boff_c;
892 let block_y = cpy + boff_r;
893
894 if chroma_cbp >= 2 {
895 if let Some(result) = decode_cavlc_on_reader(reader, 0) {
896 let coeffs_scan = super::cavlc::expand_cavlc_to_coefficients(&result, 16);
897 let mut coeffs = [0i32; 16];
898 unscan_4x4(&coeffs_scan, &mut coeffs);
899
900 let chroma_qp = chroma_qp_from_luma_qp(qp);
901 dequant_4x4(&mut coeffs, chroma_qp);
902 inverse_dct_4x4(&mut coeffs);
903
904 let dc_pred = compute_dc_prediction_chroma(plane, stride_uv, block_x, block_y);
905
906 for r in 0..4 {
907 for c in 0..4 {
908 let residual = coeffs[r * 4 + c];
909 let val = (dc_pred as i32 + residual).clamp(0, 255) as u8;
910 let idx = (block_y + r) * stride_uv + block_x + c;
911 if idx < plane.len() {
912 plane[idx] = val;
913 }
914 }
915 }
916 } else {
917 let dc_pred = compute_dc_prediction_chroma(plane, stride_uv, block_x, block_y);
918 write_dc_block_chroma(plane, stride_uv, block_x, block_y, dc_pred);
919 }
920 } else {
921 let dc_pred = compute_dc_prediction_chroma(plane, stride_uv, block_x, block_y);
922 write_dc_block_chroma(plane, stride_uv, block_x, block_y, dc_pred);
923 }
924 }
925 }
926
927 Ok(())
928}
929
930fn compute_dc_prediction_luma(plane: &[u8], stride: usize, bx: usize, by: usize) -> u8 {
932 let mut sum = 0u32;
933 let mut count = 0u32;
934
935 if by > 0 {
937 for c in 0..4 {
938 let idx = (by - 1) * stride + bx + c;
939 if idx < plane.len() {
940 sum += plane[idx] as u32;
941 count += 1;
942 }
943 }
944 }
945
946 if bx > 0 {
948 for r in 0..4 {
949 let idx = (by + r) * stride + bx - 1;
950 if idx < plane.len() {
951 sum += plane[idx] as u32;
952 count += 1;
953 }
954 }
955 }
956
957 if count > 0 { (sum / count) as u8 } else { 128 }
958}
959
960fn compute_dc_prediction_chroma(plane: &[u8], stride: usize, bx: usize, by: usize) -> u8 {
962 compute_dc_prediction_luma(plane, stride, bx, by)
963}
964
965fn write_dc_block_luma(plane: &mut [u8], stride: usize, bx: usize, by: usize, val: u8) {
967 for r in 0..4 {
968 for c in 0..4 {
969 let idx = (by + r) * stride + bx + c;
970 if idx < plane.len() {
971 plane[idx] = val;
972 }
973 }
974 }
975}
976
977fn write_dc_block_chroma(plane: &mut [u8], stride: usize, bx: usize, by: usize, val: u8) {
979 write_dc_block_luma(plane, stride, bx, by, val);
980}
981
982fn chroma_qp_from_luma_qp(qp_y: i32) -> i32 {
984 const QPC_TABLE: [i32; 52] = [
985 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
986 25, 26, 27, 28, 29, 29, 30, 31, 32, 32, 33, 34, 34, 35, 35, 36, 36, 37, 37, 37, 38, 38, 38,
987 39, 39, 39, 39,
988 ];
989 let idx = qp_y.clamp(0, 51) as usize;
990 QPC_TABLE[idx]
991}
992
993pub fn deinterlace_fields(
1003 top_field: &[u8],
1004 bottom_field: &[u8],
1005 width: usize,
1006 height: usize,
1007) -> Vec<u8> {
1008 let row_bytes = width * 3; let mut frame = vec![0u8; height * 2 * row_bytes];
1010 for y in 0..height {
1011 let dst_even_start = y * 2 * row_bytes;
1013 let src_top_start = y * row_bytes;
1014 if src_top_start + row_bytes <= top_field.len() && dst_even_start + row_bytes <= frame.len()
1015 {
1016 frame[dst_even_start..dst_even_start + row_bytes]
1017 .copy_from_slice(&top_field[src_top_start..src_top_start + row_bytes]);
1018 }
1019 let dst_odd_start = (y * 2 + 1) * row_bytes;
1021 let src_bot_start = y * row_bytes;
1022 if src_bot_start + row_bytes <= bottom_field.len()
1023 && dst_odd_start + row_bytes <= frame.len()
1024 {
1025 frame[dst_odd_start..dst_odd_start + row_bytes]
1026 .copy_from_slice(&bottom_field[src_bot_start..src_bot_start + row_bytes]);
1027 }
1028 }
1029 frame
1030}
1031
1032pub fn generate_slice_group_map(pps: &Pps, sps: &Sps) -> Vec<u8> {
1042 let pic_width = sps.pic_width_in_mbs as usize;
1043 let pic_height = sps.pic_height_in_map_units as usize;
1044 let num_mbs = pic_width * pic_height;
1045 let mut map = vec![0u8; num_mbs];
1046
1047 if pps.num_slice_groups <= 1 {
1048 return map; }
1050
1051 let num_groups = pps.num_slice_groups as usize;
1052
1053 match pps.slice_group_map_type {
1054 0 => {
1055 let mut i = 0;
1057 loop {
1058 if i >= num_mbs {
1059 break;
1060 }
1061 for group in 0..num_groups {
1062 let run = if group < pps.run_length_minus1.len() {
1063 pps.run_length_minus1[group] as usize + 1
1064 } else {
1065 1
1066 };
1067 for _ in 0..run {
1068 if i >= num_mbs {
1069 break;
1070 }
1071 map[i] = group as u8;
1072 i += 1;
1073 }
1074 }
1075 }
1076 }
1077 1 => {
1078 for i in 0..num_mbs {
1080 let x = i % pic_width;
1081 let y = i / pic_width;
1082 let group = ((x + ((y * num_groups) / 2)) % num_groups) as u8;
1083 map[i] = group;
1084 }
1085 }
1086 2 => {
1087 let bg_group = (num_groups - 1) as u8;
1090 for m in map.iter_mut() {
1091 *m = bg_group;
1092 }
1093 for group in (0..num_groups.saturating_sub(1)).rev() {
1095 if group >= pps.top_left.len() || group >= pps.bottom_right.len() {
1096 continue;
1097 }
1098 let tl = pps.top_left[group] as usize;
1099 let br = pps.bottom_right[group] as usize;
1100 let tl_x = tl % pic_width;
1101 let tl_y = tl / pic_width;
1102 let br_x = br % pic_width;
1103 let br_y = br / pic_width;
1104 for y in tl_y..=br_y.min(pic_height.saturating_sub(1)) {
1105 for x in tl_x..=br_x.min(pic_width.saturating_sub(1)) {
1106 let idx = y * pic_width + x;
1107 if idx < num_mbs {
1108 map[idx] = group as u8;
1109 }
1110 }
1111 }
1112 }
1113 }
1114 3..=5 => {
1115 let change = (pps.slice_group_change_rate as usize).min(num_mbs);
1122 for (i, m) in map.iter_mut().enumerate() {
1123 *m = if i < change { 0 } else { 1 };
1124 }
1125 }
1126 6 => {
1127 for (i, m) in map.iter_mut().enumerate() {
1129 if i < pps.slice_group_id.len() {
1130 *m = pps.slice_group_id[i] as u8;
1131 }
1132 }
1133 }
1134 _ => {
1135 }
1137 }
1138
1139 map
1140}
1141
1142pub fn chroma_dimensions(width: usize, height: usize, chroma_format: u32) -> (usize, usize) {
1154 match chroma_format {
1155 0 => (0, 0), 1 => (width / 2, height / 2), 2 => (width / 2, height), 3 => (width, height), _ => (width / 2, height / 2), }
1161}
1162
1163pub fn yuv422_to_rgb8(
1167 y_plane: &[u8],
1168 u_plane: &[u8],
1169 v_plane: &[u8],
1170 width: usize,
1171 height: usize,
1172) -> Result<Vec<u8>, VideoError> {
1173 let expected_y = width * height;
1174 let expected_uv = (width / 2) * height;
1175
1176 if y_plane.len() < expected_y {
1177 return Err(VideoError::Codec(format!(
1178 "Y plane too small: expected {expected_y}, got {}",
1179 y_plane.len()
1180 )));
1181 }
1182 if u_plane.len() < expected_uv || v_plane.len() < expected_uv {
1183 return Err(VideoError::Codec(format!(
1184 "UV planes too small for 4:2:2: expected {expected_uv}, got U={} V={}",
1185 u_plane.len(),
1186 v_plane.len()
1187 )));
1188 }
1189
1190 let mut rgb = vec![0u8; width * height * 3];
1191 let uv_stride = width / 2;
1192
1193 for row in 0..height {
1194 let y_off = row * width;
1195 let uv_off = row * uv_stride;
1196
1197 for col in 0..width {
1198 let y_val = y_plane[y_off + col] as i16;
1199 let u_val = u_plane[uv_off + col / 2] as i16 - 128;
1200 let v_val = v_plane[uv_off + col / 2] as i16 - 128;
1201
1202 let r = y_val + ((v_val * 179) >> 7);
1203 let g = y_val - ((u_val * 44 + v_val * 91) >> 7);
1204 let b = y_val + ((u_val * 227) >> 7);
1205
1206 let idx = (row * width + col) * 3;
1207 rgb[idx] = r.clamp(0, 255) as u8;
1208 rgb[idx + 1] = g.clamp(0, 255) as u8;
1209 rgb[idx + 2] = b.clamp(0, 255) as u8;
1210 }
1211 }
1212
1213 Ok(rgb)
1214}
1215
1216pub fn yuv444_to_rgb8(
1220 y_plane: &[u8],
1221 u_plane: &[u8],
1222 v_plane: &[u8],
1223 width: usize,
1224 height: usize,
1225) -> Result<Vec<u8>, VideoError> {
1226 let expected = width * height;
1227
1228 if y_plane.len() < expected {
1229 return Err(VideoError::Codec(format!(
1230 "Y plane too small: expected {expected}, got {}",
1231 y_plane.len()
1232 )));
1233 }
1234 if u_plane.len() < expected || v_plane.len() < expected {
1235 return Err(VideoError::Codec(format!(
1236 "UV planes too small for 4:4:4: expected {expected}, got U={} V={}",
1237 u_plane.len(),
1238 v_plane.len()
1239 )));
1240 }
1241
1242 let mut rgb = vec![0u8; width * height * 3];
1243
1244 for i in 0..expected {
1245 let y_val = y_plane[i] as i16;
1246 let u_val = u_plane[i] as i16 - 128;
1247 let v_val = v_plane[i] as i16 - 128;
1248
1249 let r = y_val + ((v_val * 179) >> 7);
1250 let g = y_val - ((u_val * 44 + v_val * 91) >> 7);
1251 let b = y_val + ((u_val * 227) >> 7);
1252
1253 let idx = i * 3;
1254 rgb[idx] = r.clamp(0, 255) as u8;
1255 rgb[idx + 1] = g.clamp(0, 255) as u8;
1256 rgb[idx + 2] = b.clamp(0, 255) as u8;
1257 }
1258
1259 Ok(rgb)
1260}
1261
1262pub fn mono_to_rgb8(y_plane: &[u8], width: usize, height: usize) -> Result<Vec<u8>, VideoError> {
1264 let expected = width * height;
1265 if y_plane.len() < expected {
1266 return Err(VideoError::Codec(format!(
1267 "Y plane too small: expected {expected}, got {}",
1268 y_plane.len()
1269 )));
1270 }
1271 let mut rgb = vec![0u8; expected * 3];
1272 for i in 0..expected {
1273 let v = y_plane[i];
1274 let idx = i * 3;
1275 rgb[idx] = v;
1276 rgb[idx + 1] = v;
1277 rgb[idx + 2] = v;
1278 }
1279 Ok(rgb)
1280}
1281
1282fn yuv_to_rgb8_by_format(
1284 y_plane: &[u8],
1285 u_plane: &[u8],
1286 v_plane: &[u8],
1287 width: usize,
1288 height: usize,
1289 chroma_format_idc: u32,
1290) -> Result<Vec<u8>, VideoError> {
1291 match chroma_format_idc {
1292 0 => mono_to_rgb8(y_plane, width, height),
1293 1 => yuv420_to_rgb8(y_plane, u_plane, v_plane, width, height),
1294 2 => yuv422_to_rgb8(y_plane, u_plane, v_plane, width, height),
1295 3 => yuv444_to_rgb8(y_plane, u_plane, v_plane, width, height),
1296 _ => yuv420_to_rgb8(y_plane, u_plane, v_plane, width, height),
1297 }
1298}
1299
1300pub struct H264Decoder {
1314 sps: Option<Sps>,
1315 pps: Option<Pps>,
1316 _pending_nals: Vec<NalUnit>,
1317 pending_top_field: Option<PendingField>,
1319}
1320
1321#[derive(Debug, Clone)]
1323#[allow(dead_code)]
1324struct PendingField {
1325 rgb_data: Vec<u8>,
1326 width: usize,
1327 height: usize,
1328 timestamp_us: u64,
1329}
1330
1331impl H264Decoder {
1332 pub fn new() -> Self {
1333 Self {
1334 sps: None,
1335 pps: None,
1336 _pending_nals: Vec::new(),
1337 pending_top_field: None,
1338 }
1339 }
1340
1341 pub fn process_nal(&mut self, nal: &NalUnit) -> Result<Option<DecodedFrame>, VideoError> {
1342 match nal.nal_type {
1343 NalUnitType::Sps => {
1344 let sps_data = if nal.data.len() > 1 {
1346 &nal.data[1..]
1347 } else {
1348 &nal.data
1349 };
1350 self.sps = Some(parse_sps(sps_data)?);
1351 Ok(None)
1352 }
1353 NalUnitType::Pps => {
1354 let pps_data = if nal.data.len() > 1 {
1355 &nal.data[1..]
1356 } else {
1357 &nal.data
1358 };
1359 self.pps = Some(parse_pps(pps_data)?);
1360 Ok(None)
1361 }
1362 NalUnitType::Idr => {
1363 if nal.data.len() < 2 {
1364 return Err(VideoError::Codec("IDR NAL unit too short".into()));
1365 }
1366
1367 let sps = self
1368 .sps
1369 .as_ref()
1370 .ok_or_else(|| VideoError::Codec("IDR received before SPS".into()))?
1371 .clone();
1372 let pps = self
1373 .pps
1374 .as_ref()
1375 .ok_or_else(|| VideoError::Codec("IDR received before PPS".into()))?
1376 .clone();
1377
1378 let w = sps.cropped_width();
1379 let h = sps.cropped_height();
1380
1381 if w == 0 || h == 0 {
1383 return Err(VideoError::Codec(
1384 "SPS yields zero-sized frame dimensions".into(),
1385 ));
1386 }
1387 if w > 16384 || h > 16384 {
1388 return Err(VideoError::Codec(format!(
1389 "SPS frame dimensions too large: {w}x{h} (max 16384x16384)"
1390 )));
1391 }
1392
1393 let mb_w = sps.pic_width_in_mbs as usize;
1394 let mb_h = sps.pic_height_in_map_units as usize;
1395 let full_w = mb_w
1396 .checked_mul(16)
1397 .ok_or_else(|| VideoError::Codec("macroblock width overflow".into()))?;
1398 let full_h = mb_h
1399 .checked_mul(16)
1400 .ok_or_else(|| VideoError::Codec("macroblock height overflow".into()))?;
1401
1402 let rbsp = remove_emulation_prevention(&nal.data[1..]);
1404 let mut reader = BitstreamReader::new(&rbsp);
1405
1406 let slice_header = match parse_slice_header(&mut reader, &sps, &pps, true) {
1407 Ok(sh) => sh,
1408 Err(_) => {
1409 let rgb8_data = vec![128u8; w * h * 3];
1411 return Ok(Some(DecodedFrame {
1412 width: w,
1413 height: h,
1414 rgb8_data,
1415 timestamp_us: 0,
1416 keyframe: true,
1417 }));
1418 }
1419 };
1420
1421 let (chroma_w, chroma_h) = chroma_dimensions(full_w, full_h, sps.chroma_format_idc);
1423
1424 let mut y_plane = vec![128u8; full_w * full_h];
1426 let mut u_plane = vec![128u8; chroma_w.max(1) * chroma_h.max(1)];
1427 let mut v_plane = vec![128u8; chroma_w.max(1) * chroma_h.max(1)];
1428
1429 let stride_y = full_w;
1430 let stride_uv = chroma_w.max(1);
1431
1432 let _slice_group_map = generate_slice_group_map(&pps, &sps);
1434
1435 for mb_idx in 0..(mb_w * mb_h) {
1438 let mb_x = mb_idx % mb_w;
1439 let mb_y = mb_idx / mb_w;
1440
1441 if reader.bits_remaining() < 8 {
1442 break;
1443 }
1444
1445 if decode_macroblock(
1446 &mut reader,
1447 slice_header.qp,
1448 mb_x,
1449 mb_y,
1450 &mut y_plane,
1451 &mut u_plane,
1452 &mut v_plane,
1453 stride_y,
1454 stride_uv,
1455 )
1456 .is_err()
1457 {
1458 break;
1459 }
1460 }
1461
1462 let rgb8_full = yuv_to_rgb8_by_format(
1464 &y_plane,
1465 &u_plane,
1466 &v_plane,
1467 full_w,
1468 full_h,
1469 sps.chroma_format_idc,
1470 )?;
1471
1472 let rgb8_data = if full_w == w && full_h == h {
1474 rgb8_full
1475 } else if w <= full_w && h <= full_h {
1476 let mut cropped = vec![0u8; w * h * 3];
1477 for row in 0..h {
1478 let src_start = row * full_w * 3;
1479 let dst_start = row * w * 3;
1480 if src_start + w * 3 <= rgb8_full.len()
1481 && dst_start + w * 3 <= cropped.len()
1482 {
1483 cropped[dst_start..dst_start + w * 3]
1484 .copy_from_slice(&rgb8_full[src_start..src_start + w * 3]);
1485 }
1486 }
1487 cropped
1488 } else {
1489 return Err(VideoError::Codec(
1490 "cropped dimensions exceed full frame size".into(),
1491 ));
1492 };
1493
1494 if slice_header.field_pic_flag {
1496 if !slice_header.bottom_field_flag {
1497 self.pending_top_field = Some(PendingField {
1499 rgb_data: rgb8_data,
1500 width: w,
1501 height: h,
1502 timestamp_us: 0,
1503 });
1504 return Ok(None);
1505 }
1506 if let Some(top) = self.pending_top_field.take() {
1508 let frame_h = top.height + h;
1509 let deinterlaced =
1510 deinterlace_fields(&top.rgb_data, &rgb8_data, w, h.min(top.height));
1511 return Ok(Some(DecodedFrame {
1512 width: w,
1513 height: frame_h,
1514 rgb8_data: deinterlaced,
1515 timestamp_us: top.timestamp_us,
1516 keyframe: true,
1517 }));
1518 }
1519 }
1521
1522 Ok(Some(DecodedFrame {
1523 width: w,
1524 height: h,
1525 rgb8_data,
1526 timestamp_us: 0,
1527 keyframe: true,
1528 }))
1529 }
1530 _ => Ok(None),
1531 }
1532 }
1533}
1534
1535impl Default for H264Decoder {
1536 fn default() -> Self {
1537 Self::new()
1538 }
1539}
1540
1541impl VideoDecoder for H264Decoder {
1542 fn codec(&self) -> VideoCodec {
1543 VideoCodec::H264
1544 }
1545
1546 fn decode(
1547 &mut self,
1548 data: &[u8],
1549 timestamp_us: u64,
1550 ) -> Result<Option<DecodedFrame>, VideoError> {
1551 let nals = crate::parse_annex_b(data);
1552 let mut last_frame = None;
1553
1554 for nal in &nals {
1555 if let Some(mut frame) = self.process_nal(nal)? {
1556 frame.timestamp_us = timestamp_us;
1557 last_frame = Some(frame);
1558 }
1559 }
1560
1561 Ok(last_frame)
1562 }
1563
1564 fn flush(&mut self) -> Result<Vec<DecodedFrame>, VideoError> {
1565 Ok(Vec::new())
1567 }
1568}
1569
1570#[allow(unsafe_code)]
1582pub fn yuv420_to_rgb8(
1583 y_plane: &[u8],
1584 u_plane: &[u8],
1585 v_plane: &[u8],
1586 width: usize,
1587 height: usize,
1588) -> Result<Vec<u8>, VideoError> {
1589 let expected_y = width * height;
1590 let expected_uv = (width / 2) * (height / 2);
1591
1592 if y_plane.len() < expected_y {
1593 return Err(VideoError::Codec(format!(
1594 "Y plane too small: expected {expected_y}, got {}",
1595 y_plane.len()
1596 )));
1597 }
1598 if u_plane.len() < expected_uv || v_plane.len() < expected_uv {
1599 return Err(VideoError::Codec(format!(
1600 "UV planes too small: expected {expected_uv}, got U={} V={}",
1601 u_plane.len(),
1602 v_plane.len()
1603 )));
1604 }
1605
1606 let mut rgb = vec![0u8; width * height * 3];
1607 let uv_stride = width / 2;
1608
1609 if height < 4 {
1610 yuv420_to_rgb8_rows(
1612 y_plane, u_plane, v_plane, &mut rgb, width, uv_stride, 0, height,
1613 );
1614 } else {
1615 use rayon::prelude::*;
1619
1620 let row_bytes = width * 3;
1621 rgb.par_chunks_mut(row_bytes)
1622 .enumerate()
1623 .for_each(|(row_idx, row_slice)| {
1624 yuv420_to_rgb8_rows(
1625 y_plane,
1626 u_plane,
1627 v_plane,
1628 row_slice,
1629 width,
1630 uv_stride,
1631 row_idx,
1632 row_idx + 1,
1633 );
1634 });
1635 }
1636
1637 Ok(rgb)
1638}
1639
1640#[inline]
1643#[allow(unsafe_code)]
1644fn yuv420_to_rgb8_rows(
1645 y_plane: &[u8],
1646 u_plane: &[u8],
1647 v_plane: &[u8],
1648 rgb_out: &mut [u8],
1649 width: usize,
1650 uv_stride: usize,
1651 start_row: usize,
1652 end_row: usize,
1653) {
1654 #[cfg(target_arch = "aarch64")]
1655 {
1656 if std::arch::is_aarch64_feature_detected!("neon") {
1657 unsafe {
1659 yuv420_to_rgb8_rows_neon(
1660 y_plane, u_plane, v_plane, rgb_out, width, uv_stride, start_row, end_row,
1661 );
1662 }
1663 return;
1664 }
1665 }
1666
1667 #[cfg(target_arch = "x86_64")]
1668 {
1669 if is_x86_feature_detected!("avx2") {
1670 unsafe {
1671 yuv420_to_rgb8_rows_avx2(
1672 y_plane, u_plane, v_plane, rgb_out, width, uv_stride, start_row, end_row,
1673 );
1674 }
1675 return;
1676 }
1677 if is_x86_feature_detected!("sse2") {
1678 unsafe {
1679 yuv420_to_rgb8_rows_sse2(
1680 y_plane, u_plane, v_plane, rgb_out, width, uv_stride, start_row, end_row,
1681 );
1682 }
1683 return;
1684 }
1685 }
1686
1687 yuv420_to_rgb8_rows_scalar(
1688 y_plane, u_plane, v_plane, rgb_out, width, uv_stride, start_row, end_row,
1689 );
1690}
1691
1692#[inline]
1694fn yuv420_to_rgb8_rows_scalar(
1695 y_plane: &[u8],
1696 u_plane: &[u8],
1697 v_plane: &[u8],
1698 rgb_out: &mut [u8],
1699 width: usize,
1700 uv_stride: usize,
1701 start_row: usize,
1702 end_row: usize,
1703) {
1704 for row in start_row..end_row {
1710 let out_row = row - start_row;
1711 let y_row_off = row * width;
1712 let uv_row_off = (row / 2) * uv_stride;
1713
1714 for col in 0..width {
1715 let y_val = y_plane[y_row_off + col] as i16;
1716 let u_val = u_plane[uv_row_off + col / 2] as i16 - 128;
1717 let v_val = v_plane[uv_row_off + col / 2] as i16 - 128;
1718
1719 let r = y_val + ((v_val * 179) >> 7);
1720 let g = y_val - ((u_val * 44 + v_val * 91) >> 7);
1721 let b = y_val + ((u_val * 227) >> 7);
1722
1723 let idx = (out_row * width + col) * 3;
1724 rgb_out[idx] = r.clamp(0, 255) as u8;
1725 rgb_out[idx + 1] = g.clamp(0, 255) as u8;
1726 rgb_out[idx + 2] = b.clamp(0, 255) as u8;
1727 }
1728 }
1729}
1730
1731#[cfg(target_arch = "aarch64")]
1734#[target_feature(enable = "neon")]
1735#[allow(unsafe_code, unsafe_op_in_unsafe_fn)]
1736unsafe fn yuv420_to_rgb8_rows_neon(
1737 y_plane: &[u8],
1738 u_plane: &[u8],
1739 v_plane: &[u8],
1740 rgb_out: &mut [u8],
1741 width: usize,
1742 uv_stride: usize,
1743 start_row: usize,
1744 end_row: usize,
1745) {
1746 use std::arch::aarch64::*;
1747
1748 let c_179 = vdupq_n_s16(179); let c_44 = vdupq_n_s16(44); let c_91 = vdupq_n_s16(91); let c_227 = vdupq_n_s16(227); let c_128 = vdupq_n_s16(128);
1754
1755 for row in start_row..end_row {
1756 let out_row = row - start_row;
1757 let y_row_ptr = y_plane.as_ptr().add(row * width);
1758 let uv_row = (row / 2) * uv_stride;
1759 let u_row_ptr = u_plane.as_ptr().add(uv_row);
1760 let v_row_ptr = v_plane.as_ptr().add(uv_row);
1761 let rgb_row_ptr = rgb_out.as_mut_ptr().add(out_row * width * 3);
1762
1763 let mut col = 0usize;
1764
1765 while col + 16 <= width {
1767 let y16 = vld1q_u8(y_row_ptr.add(col));
1769 let u8_vals = vld1_u8(u_row_ptr.add(col / 2));
1771 let v8_vals = vld1_u8(v_row_ptr.add(col / 2));
1772
1773 let u16_dup = vcombine_u8(vzip1_u8(u8_vals, u8_vals), vzip2_u8(u8_vals, u8_vals));
1775 let v16_dup = vcombine_u8(vzip1_u8(v8_vals, v8_vals), vzip2_u8(v8_vals, v8_vals));
1776
1777 let y_lo = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(y16)));
1779 let u_lo = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(u16_dup))), c_128);
1780 let v_lo = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v16_dup))), c_128);
1781
1782 let r_lo = vaddq_s16(y_lo, vshrq_n_s16::<7>(vmulq_s16(v_lo, c_179)));
1784 let g_lo = vsubq_s16(
1786 y_lo,
1787 vshrq_n_s16::<7>(vaddq_s16(vmulq_s16(u_lo, c_44), vmulq_s16(v_lo, c_91))),
1788 );
1789 let b_lo = vaddq_s16(y_lo, vshrq_n_s16::<7>(vmulq_s16(u_lo, c_227)));
1791
1792 let r_lo_u8 = vqmovun_s16(r_lo);
1794 let g_lo_u8 = vqmovun_s16(g_lo);
1795 let b_lo_u8 = vqmovun_s16(b_lo);
1796
1797 let y_hi = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(y16)));
1799 let u_hi = vsubq_s16(
1800 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(u16_dup))),
1801 c_128,
1802 );
1803 let v_hi = vsubq_s16(
1804 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v16_dup))),
1805 c_128,
1806 );
1807
1808 let r_hi = vaddq_s16(y_hi, vshrq_n_s16::<7>(vmulq_s16(v_hi, c_179)));
1809 let g_hi = vsubq_s16(
1810 y_hi,
1811 vshrq_n_s16::<7>(vaddq_s16(vmulq_s16(u_hi, c_44), vmulq_s16(v_hi, c_91))),
1812 );
1813 let b_hi = vaddq_s16(y_hi, vshrq_n_s16::<7>(vmulq_s16(u_hi, c_227)));
1814
1815 let r_hi_u8 = vqmovun_s16(r_hi);
1816 let g_hi_u8 = vqmovun_s16(g_hi);
1817 let b_hi_u8 = vqmovun_s16(b_hi);
1818
1819 let rgb_lo = uint8x8x3_t(r_lo_u8, g_lo_u8, b_lo_u8);
1821 vst3_u8(rgb_row_ptr.add(col * 3), rgb_lo);
1822
1823 let rgb_hi = uint8x8x3_t(r_hi_u8, g_hi_u8, b_hi_u8);
1824 vst3_u8(rgb_row_ptr.add((col + 8) * 3), rgb_hi);
1825
1826 col += 16;
1827 }
1828
1829 if col + 8 <= width {
1831 let y8_vals = vld1_u8(y_row_ptr.add(col));
1832 let u4_vals_raw = u_row_ptr.add(col / 2);
1833 let v4_vals_raw = v_row_ptr.add(col / 2);
1834
1835 let mut u_buf = [0u8; 8];
1837 let mut v_buf = [0u8; 8];
1838 for i in 0..4 {
1839 u_buf[i * 2] = *u4_vals_raw.add(i);
1840 u_buf[i * 2 + 1] = *u4_vals_raw.add(i);
1841 v_buf[i * 2] = *v4_vals_raw.add(i);
1842 v_buf[i * 2 + 1] = *v4_vals_raw.add(i);
1843 }
1844 let u8_dup = vld1_u8(u_buf.as_ptr());
1845 let v8_dup = vld1_u8(v_buf.as_ptr());
1846
1847 let y_i16 = vreinterpretq_s16_u16(vmovl_u8(y8_vals));
1848 let u_i16 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u8_dup)), c_128);
1849 let v_i16 = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v8_dup)), c_128);
1850
1851 let r = vaddq_s16(y_i16, vshrq_n_s16::<7>(vmulq_s16(v_i16, c_179)));
1852 let g = vsubq_s16(
1853 y_i16,
1854 vshrq_n_s16::<7>(vaddq_s16(vmulq_s16(u_i16, c_44), vmulq_s16(v_i16, c_91))),
1855 );
1856 let b = vaddq_s16(y_i16, vshrq_n_s16::<7>(vmulq_s16(u_i16, c_227)));
1857
1858 let r_u8 = vqmovun_s16(r);
1859 let g_u8 = vqmovun_s16(g);
1860 let b_u8 = vqmovun_s16(b);
1861
1862 let rgb = uint8x8x3_t(r_u8, g_u8, b_u8);
1863 vst3_u8(rgb_row_ptr.add(col * 3), rgb);
1864
1865 col += 8;
1866 }
1867
1868 while col < width {
1870 let y_val = *y_row_ptr.add(col) as i16;
1871 let u_val = *u_row_ptr.add(col / 2) as i16 - 128;
1872 let v_val = *v_row_ptr.add(col / 2) as i16 - 128;
1873
1874 let r = y_val + ((v_val * 179) >> 7);
1875 let g = y_val - (((u_val * 44) + (v_val * 91)) >> 7);
1876 let b = y_val + ((u_val * 227) >> 7);
1877
1878 let idx = col * 3;
1879 *rgb_row_ptr.add(idx) = r.clamp(0, 255) as u8;
1880 *rgb_row_ptr.add(idx + 1) = g.clamp(0, 255) as u8;
1881 *rgb_row_ptr.add(idx + 2) = b.clamp(0, 255) as u8;
1882
1883 col += 1;
1884 }
1885 }
1886}
1887
1888#[cfg(target_arch = "x86_64")]
1891#[target_feature(enable = "avx2")]
1892#[allow(unsafe_code, unsafe_op_in_unsafe_fn)]
1893unsafe fn yuv420_to_rgb8_rows_avx2(
1894 y_plane: &[u8],
1895 u_plane: &[u8],
1896 v_plane: &[u8],
1897 rgb_out: &mut [u8],
1898 width: usize,
1899 uv_stride: usize,
1900 start_row: usize,
1901 end_row: usize,
1902) {
1903 use std::arch::x86_64::*;
1904
1905 let c_179 = _mm256_set1_epi16(179);
1907 let c_44 = _mm256_set1_epi16(44);
1908 let c_91 = _mm256_set1_epi16(91);
1909 let c_227 = _mm256_set1_epi16(227);
1910 let c_128 = _mm256_set1_epi16(128);
1911 let zero = _mm256_setzero_si256();
1912
1913 for row in start_row..end_row {
1914 let out_row = row - start_row;
1915 let y_row_ptr = y_plane.as_ptr().add(row * width);
1916 let uv_row = (row / 2) * uv_stride;
1917 let u_row_ptr = u_plane.as_ptr().add(uv_row);
1918 let v_row_ptr = v_plane.as_ptr().add(uv_row);
1919 let rgb_row_ptr = rgb_out.as_mut_ptr().add(out_row * width * 3);
1920
1921 let mut col = 0usize;
1922
1923 while col + 16 <= width {
1925 let y16 = _mm_loadu_si128(y_row_ptr.add(col) as *const __m128i);
1927 let y_lo = _mm256_cvtepu8_epi16(y16);
1928
1929 let mut u_buf = [0u8; 16];
1931 let mut v_buf = [0u8; 16];
1932 for i in 0..8 {
1933 u_buf[i * 2] = *u_row_ptr.add(col / 2 + i);
1934 u_buf[i * 2 + 1] = *u_row_ptr.add(col / 2 + i);
1935 v_buf[i * 2] = *v_row_ptr.add(col / 2 + i);
1936 v_buf[i * 2 + 1] = *v_row_ptr.add(col / 2 + i);
1937 }
1938 let u16_raw = _mm_loadu_si128(u_buf.as_ptr() as *const __m128i);
1939 let v16_raw = _mm_loadu_si128(v_buf.as_ptr() as *const __m128i);
1940
1941 let u_i16 = _mm256_sub_epi16(_mm256_cvtepu8_epi16(u16_raw), c_128);
1942 let v_i16 = _mm256_sub_epi16(_mm256_cvtepu8_epi16(v16_raw), c_128);
1943
1944 let r = _mm256_add_epi16(
1946 y_lo,
1947 _mm256_srai_epi16::<7>(_mm256_mullo_epi16(v_i16, c_179)),
1948 );
1949 let g = _mm256_sub_epi16(
1951 y_lo,
1952 _mm256_srai_epi16::<7>(_mm256_add_epi16(
1953 _mm256_mullo_epi16(u_i16, c_44),
1954 _mm256_mullo_epi16(v_i16, c_91),
1955 )),
1956 );
1957 let b = _mm256_add_epi16(
1959 y_lo,
1960 _mm256_srai_epi16::<7>(_mm256_mullo_epi16(u_i16, c_227)),
1961 );
1962
1963 let r_packed = _mm256_packus_epi16(r, zero);
1966 let g_packed = _mm256_packus_epi16(g, zero);
1967 let b_packed = _mm256_packus_epi16(b, zero);
1968
1969 let r_perm = _mm256_permute4x64_epi64::<0xD8>(r_packed);
1971 let g_perm = _mm256_permute4x64_epi64::<0xD8>(g_packed);
1972 let b_perm = _mm256_permute4x64_epi64::<0xD8>(b_packed);
1973
1974 let r_lo128 = _mm256_castsi256_si128(r_perm);
1975 let g_lo128 = _mm256_castsi256_si128(g_perm);
1976 let b_lo128 = _mm256_castsi256_si128(b_perm);
1977
1978 let mut r_arr = [0u8; 16];
1980 let mut g_arr = [0u8; 16];
1981 let mut b_arr = [0u8; 16];
1982 _mm_storeu_si128(r_arr.as_mut_ptr() as *mut __m128i, r_lo128);
1983 _mm_storeu_si128(g_arr.as_mut_ptr() as *mut __m128i, g_lo128);
1984 _mm_storeu_si128(b_arr.as_mut_ptr() as *mut __m128i, b_lo128);
1985
1986 let mut rgb_buf = [0u8; 48];
1987 for i in 0..16 {
1988 rgb_buf[i * 3] = r_arr[i];
1989 rgb_buf[i * 3 + 1] = g_arr[i];
1990 rgb_buf[i * 3 + 2] = b_arr[i];
1991 }
1992 std::ptr::copy_nonoverlapping(rgb_buf.as_ptr(), rgb_row_ptr.add(col * 3), 48);
1993
1994 col += 16;
1995 }
1996
1997 while col + 8 <= width {
1999 let y8 = _mm_loadl_epi64(y_row_ptr.add(col) as *const __m128i);
2000 let zero128 = _mm_setzero_si128();
2001 let y_i16 = _mm_unpacklo_epi8(y8, zero128);
2002
2003 let c_179_128 = _mm_set1_epi16(179);
2004 let c_44_128 = _mm_set1_epi16(44);
2005 let c_91_128 = _mm_set1_epi16(91);
2006 let c_227_128 = _mm_set1_epi16(227);
2007 let c_128_128 = _mm_set1_epi16(128);
2008
2009 let mut u_buf = [0u8; 8];
2010 let mut v_buf = [0u8; 8];
2011 for i in 0..4 {
2012 u_buf[i * 2] = *u_row_ptr.add(col / 2 + i);
2013 u_buf[i * 2 + 1] = *u_row_ptr.add(col / 2 + i);
2014 v_buf[i * 2] = *v_row_ptr.add(col / 2 + i);
2015 v_buf[i * 2 + 1] = *v_row_ptr.add(col / 2 + i);
2016 }
2017 let u8_dup = _mm_loadl_epi64(u_buf.as_ptr() as *const __m128i);
2018 let v8_dup = _mm_loadl_epi64(v_buf.as_ptr() as *const __m128i);
2019
2020 let u_i16 = _mm_sub_epi16(_mm_unpacklo_epi8(u8_dup, zero128), c_128_128);
2021 let v_i16 = _mm_sub_epi16(_mm_unpacklo_epi8(v8_dup, zero128), c_128_128);
2022
2023 let r = _mm_add_epi16(
2024 y_i16,
2025 _mm_srai_epi16::<7>(_mm_mullo_epi16(v_i16, c_179_128)),
2026 );
2027 let g = _mm_sub_epi16(
2028 y_i16,
2029 _mm_srai_epi16::<7>(_mm_add_epi16(
2030 _mm_mullo_epi16(u_i16, c_44_128),
2031 _mm_mullo_epi16(v_i16, c_91_128),
2032 )),
2033 );
2034 let b = _mm_add_epi16(
2035 y_i16,
2036 _mm_srai_epi16::<7>(_mm_mullo_epi16(u_i16, c_227_128)),
2037 );
2038
2039 let r_u8 = _mm_packus_epi16(r, zero128);
2040 let g_u8 = _mm_packus_epi16(g, zero128);
2041 let b_u8 = _mm_packus_epi16(b, zero128);
2042
2043 let mut r_arr = [0u8; 8];
2044 let mut g_arr = [0u8; 8];
2045 let mut b_arr = [0u8; 8];
2046 _mm_storel_epi64(r_arr.as_mut_ptr() as *mut __m128i, r_u8);
2047 _mm_storel_epi64(g_arr.as_mut_ptr() as *mut __m128i, g_u8);
2048 _mm_storel_epi64(b_arr.as_mut_ptr() as *mut __m128i, b_u8);
2049
2050 let mut rgb_buf = [0u8; 24];
2051 for i in 0..8 {
2052 rgb_buf[i * 3] = r_arr[i];
2053 rgb_buf[i * 3 + 1] = g_arr[i];
2054 rgb_buf[i * 3 + 2] = b_arr[i];
2055 }
2056 std::ptr::copy_nonoverlapping(rgb_buf.as_ptr(), rgb_row_ptr.add(col * 3), 24);
2057
2058 col += 8;
2059 }
2060
2061 while col < width {
2063 let y_val = *y_row_ptr.add(col) as i16;
2064 let u_val = *u_row_ptr.add(col / 2) as i16 - 128;
2065 let v_val = *v_row_ptr.add(col / 2) as i16 - 128;
2066
2067 let r = y_val + ((v_val * 179) >> 7);
2068 let g = y_val - (((u_val * 44) + (v_val * 91)) >> 7);
2069 let b = y_val + ((u_val * 227) >> 7);
2070
2071 let idx = col * 3;
2072 *rgb_row_ptr.add(idx) = r.clamp(0, 255) as u8;
2073 *rgb_row_ptr.add(idx + 1) = g.clamp(0, 255) as u8;
2074 *rgb_row_ptr.add(idx + 2) = b.clamp(0, 255) as u8;
2075
2076 col += 1;
2077 }
2078 }
2079}
2080
2081#[cfg(target_arch = "x86_64")]
2084#[target_feature(enable = "sse2")]
2085#[allow(unsafe_code, unsafe_op_in_unsafe_fn)]
2086unsafe fn yuv420_to_rgb8_rows_sse2(
2087 y_plane: &[u8],
2088 u_plane: &[u8],
2089 v_plane: &[u8],
2090 rgb_out: &mut [u8],
2091 width: usize,
2092 uv_stride: usize,
2093 start_row: usize,
2094 end_row: usize,
2095) {
2096 use std::arch::x86_64::*;
2097
2098 let c_179 = _mm_set1_epi16(179); let c_44 = _mm_set1_epi16(44); let c_91 = _mm_set1_epi16(91); let c_227 = _mm_set1_epi16(227); let c_128 = _mm_set1_epi16(128);
2104 let zero = _mm_setzero_si128();
2105
2106 for row in start_row..end_row {
2107 let out_row = row - start_row;
2108 let y_row_ptr = y_plane.as_ptr().add(row * width);
2109 let uv_row = (row / 2) * uv_stride;
2110 let u_row_ptr = u_plane.as_ptr().add(uv_row);
2111 let v_row_ptr = v_plane.as_ptr().add(uv_row);
2112 let rgb_row_ptr = rgb_out.as_mut_ptr().add(out_row * width * 3);
2113
2114 let mut col = 0usize;
2115
2116 while col + 8 <= width {
2118 let y8 = _mm_loadl_epi64(y_row_ptr.add(col) as *const __m128i);
2120 let y_i16 = _mm_unpacklo_epi8(y8, zero);
2121
2122 let mut u_buf = [0u8; 8];
2124 let mut v_buf = [0u8; 8];
2125 for i in 0..4 {
2126 u_buf[i * 2] = *u_row_ptr.add(col / 2 + i);
2127 u_buf[i * 2 + 1] = *u_row_ptr.add(col / 2 + i);
2128 v_buf[i * 2] = *v_row_ptr.add(col / 2 + i);
2129 v_buf[i * 2 + 1] = *v_row_ptr.add(col / 2 + i);
2130 }
2131 let u8_dup = _mm_loadl_epi64(u_buf.as_ptr() as *const __m128i);
2132 let v8_dup = _mm_loadl_epi64(v_buf.as_ptr() as *const __m128i);
2133
2134 let u_i16 = _mm_sub_epi16(_mm_unpacklo_epi8(u8_dup, zero), c_128);
2135 let v_i16 = _mm_sub_epi16(_mm_unpacklo_epi8(v8_dup, zero), c_128);
2136
2137 let r = _mm_add_epi16(y_i16, _mm_srai_epi16::<7>(_mm_mullo_epi16(v_i16, c_179)));
2139 let g = _mm_sub_epi16(
2141 y_i16,
2142 _mm_srai_epi16::<7>(_mm_add_epi16(
2143 _mm_mullo_epi16(u_i16, c_44),
2144 _mm_mullo_epi16(v_i16, c_91),
2145 )),
2146 );
2147 let b = _mm_add_epi16(y_i16, _mm_srai_epi16::<7>(_mm_mullo_epi16(u_i16, c_227)));
2149
2150 let r_u8 = _mm_packus_epi16(r, zero); let g_u8 = _mm_packus_epi16(g, zero);
2153 let b_u8 = _mm_packus_epi16(b, zero);
2154
2155 let mut rgb_buf = [0u8; 24];
2157 let mut r_arr = [0u8; 8];
2158 let mut g_arr = [0u8; 8];
2159 let mut b_arr = [0u8; 8];
2160 _mm_storel_epi64(r_arr.as_mut_ptr() as *mut __m128i, r_u8);
2161 _mm_storel_epi64(g_arr.as_mut_ptr() as *mut __m128i, g_u8);
2162 _mm_storel_epi64(b_arr.as_mut_ptr() as *mut __m128i, b_u8);
2163 for i in 0..8 {
2164 rgb_buf[i * 3] = r_arr[i];
2165 rgb_buf[i * 3 + 1] = g_arr[i];
2166 rgb_buf[i * 3 + 2] = b_arr[i];
2167 }
2168 std::ptr::copy_nonoverlapping(rgb_buf.as_ptr(), rgb_row_ptr.add(col * 3), 24);
2169
2170 col += 8;
2171 }
2172
2173 while col < width {
2175 let y_val = *y_row_ptr.add(col) as i16;
2176 let u_val = *u_row_ptr.add(col / 2) as i16 - 128;
2177 let v_val = *v_row_ptr.add(col / 2) as i16 - 128;
2178
2179 let r = y_val + ((v_val * 179) >> 7);
2180 let g = y_val - (((u_val * 44) + (v_val * 91)) >> 7);
2181 let b = y_val + ((u_val * 227) >> 7);
2182
2183 let idx = col * 3;
2184 *rgb_row_ptr.add(idx) = r.clamp(0, 255) as u8;
2185 *rgb_row_ptr.add(idx + 1) = g.clamp(0, 255) as u8;
2186 *rgb_row_ptr.add(idx + 2) = b.clamp(0, 255) as u8;
2187
2188 col += 1;
2189 }
2190 }
2191}
2192
2193#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2199pub enum HevcNalUnitType {
2200 TrailN,
2201 TrailR,
2202 TsaN,
2203 TsaR,
2204 StsaN,
2205 StsaR,
2206 RadlN,
2207 RadlR,
2208 RaslN,
2209 RaslR,
2210 BlaWLp,
2211 BlaWRadl,
2212 BlaNLp,
2213 IdrWRadl,
2214 IdrNLp,
2215 CraNut,
2216 VpsNut,
2217 SpsNut,
2218 PpsNut,
2219 AudNut,
2220 EosNut,
2221 EobNut,
2222 FdNut,
2223 PrefixSeiNut,
2224 SuffixSeiNut,
2225 Other(u8),
2226}
2227
2228impl HevcNalUnitType {
2229 pub fn from_header(header: &[u8]) -> Self {
2233 if header.is_empty() {
2234 return Self::Other(0);
2235 }
2236 let nal_type = (header[0] >> 1) & 0x3F;
2237 Self::from_type_byte(nal_type)
2238 }
2239
2240 fn from_type_byte(t: u8) -> Self {
2241 match t {
2242 0 => Self::TrailN,
2243 1 => Self::TrailR,
2244 2 => Self::TsaN,
2245 3 => Self::TsaR,
2246 4 => Self::StsaN,
2247 5 => Self::StsaR,
2248 6 => Self::RadlN,
2249 7 => Self::RadlR,
2250 8 => Self::RaslN,
2251 9 => Self::RaslR,
2252 16 => Self::BlaWLp,
2253 17 => Self::BlaWRadl,
2254 18 => Self::BlaNLp,
2255 19 => Self::IdrWRadl,
2256 20 => Self::IdrNLp,
2257 21 => Self::CraNut,
2258 32 => Self::VpsNut,
2259 33 => Self::SpsNut,
2260 34 => Self::PpsNut,
2261 35 => Self::AudNut,
2262 36 => Self::EosNut,
2263 37 => Self::EobNut,
2264 38 => Self::FdNut,
2265 39 => Self::PrefixSeiNut,
2266 40 => Self::SuffixSeiNut,
2267 other => Self::Other(other),
2268 }
2269 }
2270
2271 pub fn is_vcl(&self) -> bool {
2273 matches!(
2274 self,
2275 Self::TrailN
2276 | Self::TrailR
2277 | Self::TsaN
2278 | Self::TsaR
2279 | Self::StsaN
2280 | Self::StsaR
2281 | Self::RadlN
2282 | Self::RadlR
2283 | Self::RaslN
2284 | Self::RaslR
2285 | Self::BlaWLp
2286 | Self::BlaWRadl
2287 | Self::BlaNLp
2288 | Self::IdrWRadl
2289 | Self::IdrNLp
2290 | Self::CraNut
2291 )
2292 }
2293
2294 pub fn is_idr(&self) -> bool {
2296 matches!(self, Self::IdrWRadl | Self::IdrNLp)
2297 }
2298}
2299
2300#[cfg(test)]
2301mod tests {
2302 use super::*;
2303
2304 #[test]
2305 fn bitstream_reader_reads_bits() {
2306 let data = [0b10110100, 0b01100000];
2307 let mut r = BitstreamReader::new(&data);
2308 assert_eq!(r.read_bit().unwrap(), 1);
2309 assert_eq!(r.read_bit().unwrap(), 0);
2310 assert_eq!(r.read_bit().unwrap(), 1);
2311 assert_eq!(r.read_bit().unwrap(), 1);
2312 assert_eq!(r.read_bit().unwrap(), 0);
2313 assert_eq!(r.read_bit().unwrap(), 1);
2314 assert_eq!(r.read_bits(4).unwrap(), 0b0001); }
2316
2317 #[test]
2318 fn bitstream_reader_exp_golomb() {
2319 let data = [0b10000000];
2321 let mut r = BitstreamReader::new(&data);
2322 assert_eq!(r.read_ue().unwrap(), 0);
2323
2324 let data = [0b01000000];
2326 let mut r = BitstreamReader::new(&data);
2327 assert_eq!(r.read_ue().unwrap(), 1);
2328
2329 let data = [0b01100000];
2331 let mut r = BitstreamReader::new(&data);
2332 assert_eq!(r.read_ue().unwrap(), 2);
2333
2334 let data = [0b00100000];
2336 let mut r = BitstreamReader::new(&data);
2337 assert_eq!(r.read_ue().unwrap(), 3);
2338 }
2339
2340 #[test]
2341 fn bitstream_reader_signed_exp_golomb() {
2342 let data = [0b10000000];
2344 let mut r = BitstreamReader::new(&data);
2345 assert_eq!(r.read_se().unwrap(), 0);
2346
2347 let data = [0b01000000];
2349 let mut r = BitstreamReader::new(&data);
2350 assert_eq!(r.read_se().unwrap(), 1);
2351
2352 let data = [0b01100000];
2354 let mut r = BitstreamReader::new(&data);
2355 assert_eq!(r.read_se().unwrap(), -1);
2356 }
2357
2358 #[test]
2359 fn emulation_prevention_removal() {
2360 let input = [0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x01];
2361 let result = remove_emulation_prevention(&input);
2362 assert_eq!(result, [0x00, 0x00, 0x00, 0x00, 0x01]);
2363 }
2364
2365 #[test]
2366 fn yuv420_to_rgb8_pure_white() {
2367 let w = 4;
2369 let h = 4;
2370 let y = vec![235u8; w * h];
2371 let u = vec![128u8; (w / 2) * (h / 2)];
2372 let v = vec![128u8; (w / 2) * (h / 2)];
2373
2374 let rgb = yuv420_to_rgb8(&y, &u, &v, w, h).unwrap();
2375 assert_eq!(rgb.len(), w * h * 3);
2376
2377 for i in 0..(w * h) {
2379 let r = rgb[i * 3];
2380 let g = rgb[i * 3 + 1];
2381 let b = rgb[i * 3 + 2];
2382 assert!((r as i32 - 235).abs() <= 1, "R={r}");
2383 assert!((g as i32 - 235).abs() <= 1, "G={g}");
2384 assert!((b as i32 - 235).abs() <= 1, "B={b}");
2385 }
2386 }
2387
2388 #[test]
2389 fn yuv420_to_rgb8_pure_red() {
2390 let w = 2;
2392 let h = 2;
2393 let y = vec![76u8; w * h];
2394 let u = vec![84u8; (w / 2) * (h / 2)];
2395 let v = vec![255u8; (w / 2) * (h / 2)];
2396
2397 let rgb = yuv420_to_rgb8(&y, &u, &v, w, h).unwrap();
2398 let r = rgb[0];
2400 let b = rgb[2];
2401 assert!(r > 200, "R={r} should be high for red");
2402 assert!(b < 50, "B={b} should be low for red");
2403 }
2404
2405 #[test]
2406 fn hevc_nal_type_parsing() {
2407 assert_eq!(
2409 HevcNalUnitType::from_header(&[0x40, 0x01]),
2410 HevcNalUnitType::VpsNut
2411 );
2412
2413 assert_eq!(
2415 HevcNalUnitType::from_header(&[0x26, 0x01]),
2416 HevcNalUnitType::IdrWRadl
2417 );
2418
2419 assert_eq!(
2421 HevcNalUnitType::from_header(&[0x42, 0x01]),
2422 HevcNalUnitType::SpsNut
2423 );
2424
2425 let nt = HevcNalUnitType::from_header(&[0x02, 0x01]);
2427 assert_eq!(nt, HevcNalUnitType::TrailR);
2428 assert!(nt.is_vcl());
2429 assert!(!nt.is_idr());
2430 }
2431
2432 #[test]
2433 fn h264_decoder_sps_dimensions() {
2434 let mut bits = Vec::new();
2441 push_bits(&mut bits, 66, 8);
2443 push_bits(&mut bits, 0, 8);
2445 push_bits(&mut bits, 30, 8);
2447 push_exp_golomb(&mut bits, 0);
2449 push_exp_golomb(&mut bits, 0);
2451 push_exp_golomb(&mut bits, 0);
2453 push_exp_golomb(&mut bits, 0);
2455 push_exp_golomb(&mut bits, 1);
2457 push_bits(&mut bits, 0, 1);
2459 push_exp_golomb(&mut bits, 19);
2461 push_exp_golomb(&mut bits, 14);
2463 push_bits(&mut bits, 1, 1);
2465 push_bits(&mut bits, 0, 1);
2467 push_bits(&mut bits, 0, 1);
2469 push_bits(&mut bits, 0, 1);
2471
2472 let bytes = bits_to_bytes(&bits);
2473 let sps = parse_sps(&bytes).unwrap();
2474 assert_eq!(sps.profile_idc, 66);
2475 assert_eq!(sps.width(), 320);
2476 assert_eq!(sps.height(), 240);
2477 assert_eq!(sps.cropped_width(), 320);
2478 assert_eq!(sps.cropped_height(), 240);
2479 }
2480
2481 fn push_bits(bits: &mut Vec<u8>, value: u32, count: u8) {
2483 for i in (0..count).rev() {
2484 bits.push(((value >> i) & 1) as u8);
2485 }
2486 }
2487
2488 fn push_exp_golomb(bits: &mut Vec<u8>, value: u32) {
2489 if value == 0 {
2490 bits.push(1);
2491 return;
2492 }
2493 let code = value + 1;
2494 let bit_len = 32 - code.leading_zeros();
2495 let leading_zeros = bit_len - 1;
2496 for _ in 0..leading_zeros {
2497 bits.push(0);
2498 }
2499 for i in (0..bit_len).rev() {
2500 bits.push(((code >> i) & 1) as u8);
2501 }
2502 }
2503
2504 fn bits_to_bytes(bits: &[u8]) -> Vec<u8> {
2505 let mut bytes = Vec::new();
2506 for chunk in bits.chunks(8) {
2507 let mut byte = 0u8;
2508 for (i, &bit) in chunk.iter().enumerate() {
2509 byte |= bit << (7 - i);
2510 }
2511 bytes.push(byte);
2512 }
2513 bytes
2514 }
2515
2516 fn push_signed_exp_golomb(bits: &mut Vec<u8>, value: i32) {
2517 let code = if value > 0 {
2518 (2 * value - 1) as u32
2519 } else if value < 0 {
2520 (2 * (-value)) as u32
2521 } else {
2522 0
2523 };
2524 push_exp_golomb(bits, code);
2525 }
2526
2527 #[test]
2528 fn test_inverse_dct_4x4() {
2529 let mut coeffs = [0i32; 16];
2534 coeffs[0] = 64;
2535 inverse_dct_4x4(&mut coeffs);
2536 let dc_out = coeffs[0];
2538 for &c in &coeffs {
2539 assert_eq!(
2540 c, dc_out,
2541 "DC-only inverse DCT should produce uniform output"
2542 );
2543 }
2544 assert_eq!(dc_out, 1, "64 >> 6 = 1");
2545
2546 let mut coeffs2 = [0i32; 16];
2548 coeffs2[0] = 256;
2549 inverse_dct_4x4(&mut coeffs2);
2550 assert_eq!(coeffs2[0], 4, "256 >> 6 = 4");
2551 for &c in &coeffs2 {
2552 assert_eq!(c, 4);
2553 }
2554
2555 let mut coeffs3 = [0i32; 16];
2557 coeffs3[0] = 1024;
2558 coeffs3[1] = 512; coeffs3[5] = 256; inverse_dct_4x4(&mut coeffs3);
2561 let all_same = coeffs3.iter().all(|&c| c == coeffs3[0]);
2563 assert!(!all_same, "AC coefficients should break uniformity");
2564 }
2565
2566 #[test]
2567 fn test_dequant_4x4() {
2568 let mut coeffs = [1i32; 16];
2570 dequant_4x4(&mut coeffs, 0);
2571 assert_eq!(coeffs[0], 10, "pos 0, qp=0: 1*10 << 0 = 10");
2572 assert_eq!(coeffs[1], 13, "pos 1, qp=0: 1*13 << 0 = 13");
2573
2574 let mut coeffs2 = [1i32; 16];
2576 dequant_4x4(&mut coeffs2, 6);
2577 assert_eq!(coeffs2[0], 20, "pos 0, qp=6: 1*10 << 1 = 20");
2578 assert_eq!(coeffs2[1], 26, "pos 1, qp=6: 1*13 << 1 = 26");
2579
2580 let mut coeffs3 = [1i32; 16];
2582 dequant_4x4(&mut coeffs3, 12);
2583 assert_eq!(coeffs3[0], 40, "pos 0, qp=12: 1*10 << 2 = 40");
2584
2585 let mut coeffs4 = [-2i32; 16];
2587 dequant_4x4(&mut coeffs4, 0);
2588 assert_eq!(coeffs4[0], -20, "negative coeff: -2*10 = -20");
2589 }
2590
2591 #[test]
2592 fn test_h264_decoder_idr_not_all_gray() {
2593 let mut bitstream = Vec::new();
2597
2598 bitstream.extend_from_slice(&[0x00, 0x00, 0x00, 0x01]);
2601 let mut sps_bits = Vec::new();
2603 push_bits(&mut sps_bits, 66, 8);
2605 push_bits(&mut sps_bits, 0, 8);
2607 push_bits(&mut sps_bits, 30, 8);
2609 push_exp_golomb(&mut sps_bits, 0);
2611 push_exp_golomb(&mut sps_bits, 0);
2613 push_exp_golomb(&mut sps_bits, 0);
2615 push_exp_golomb(&mut sps_bits, 0);
2617 push_exp_golomb(&mut sps_bits, 0);
2619 push_bits(&mut sps_bits, 0, 1);
2621 push_exp_golomb(&mut sps_bits, 0);
2623 push_exp_golomb(&mut sps_bits, 0);
2625 push_bits(&mut sps_bits, 1, 1);
2627 push_bits(&mut sps_bits, 0, 1);
2629 push_bits(&mut sps_bits, 0, 1);
2631 push_bits(&mut sps_bits, 0, 1);
2633
2634 let sps_bytes = bits_to_bytes(&sps_bits);
2635 bitstream.push(0x67); bitstream.extend_from_slice(&sps_bytes);
2637
2638 bitstream.extend_from_slice(&[0x00, 0x00, 0x00, 0x01]);
2640 let mut pps_bits = Vec::new();
2641 push_exp_golomb(&mut pps_bits, 0);
2643 push_exp_golomb(&mut pps_bits, 0);
2645 push_bits(&mut pps_bits, 0, 1);
2647 push_bits(&mut pps_bits, 0, 1);
2649 push_exp_golomb(&mut pps_bits, 0);
2651 push_exp_golomb(&mut pps_bits, 0);
2653 push_exp_golomb(&mut pps_bits, 0);
2655 push_bits(&mut pps_bits, 0, 1);
2657 push_bits(&mut pps_bits, 0, 2);
2659 push_signed_exp_golomb(&mut pps_bits, 0);
2661
2662 let pps_bytes = bits_to_bytes(&pps_bits);
2663 bitstream.push(0x68); bitstream.extend_from_slice(&pps_bytes);
2665
2666 bitstream.extend_from_slice(&[0x00, 0x00, 0x00, 0x01]);
2668 let mut idr_bits = Vec::new();
2669 push_exp_golomb(&mut idr_bits, 0);
2672 push_exp_golomb(&mut idr_bits, 2);
2674 push_exp_golomb(&mut idr_bits, 0);
2676 push_bits(&mut idr_bits, 0, 4);
2678 push_exp_golomb(&mut idr_bits, 0);
2680 push_bits(&mut idr_bits, 0, 4);
2682 push_bits(&mut idr_bits, 0, 1);
2684 push_bits(&mut idr_bits, 0, 1);
2685 push_signed_exp_golomb(&mut idr_bits, 0);
2687
2688 push_exp_golomb(&mut idr_bits, 0);
2690
2691 for _ in 0..16 {
2693 push_bits(&mut idr_bits, 1, 1); }
2695 push_exp_golomb(&mut idr_bits, 0);
2697 push_exp_golomb(&mut idr_bits, 3);
2699
2700 while idr_bits.len() % 8 != 0 {
2702 idr_bits.push(0);
2703 }
2704
2705 let idr_bytes = bits_to_bytes(&idr_bits);
2706 bitstream.push(0x65); bitstream.extend_from_slice(&idr_bytes);
2708
2709 let mut decoder = H264Decoder::new();
2711 let result = decoder.decode(&bitstream, 0);
2712
2713 assert!(
2715 result.is_ok(),
2716 "Decoder should not error: {:?}",
2717 result.err()
2718 );
2719 let frame = result.unwrap();
2720 assert!(
2721 frame.is_some(),
2722 "Decoder should produce a frame from SPS+PPS+IDR"
2723 );
2724
2725 let frame = frame.unwrap();
2726 assert_eq!(frame.width, 16);
2727 assert_eq!(frame.height, 16);
2728 assert_eq!(frame.rgb8_data.len(), 16 * 16 * 3);
2729 assert!(frame.keyframe);
2730
2731 let all_gray = frame.rgb8_data.iter().all(|&b| b == 128);
2738 assert_eq!(frame.rgb8_data.len(), 16 * 16 * 3);
2743
2744 assert!(frame.keyframe);
2746
2747 assert!(!frame.rgb8_data.is_empty());
2753
2754 if all_gray {
2757 }
2760 }
2761}