oxideav-h261 0.0.2

Pure-Rust ITU-T H.261 video decoder for oxideav
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
//! H.261 encoder — foundation.
//!
//! The public entry point is [`encode_intra_picture`], which takes a
//! Y/Cb/Cr 4:2:0 source frame at either QCIF (176x144) or CIF (352x288)
//! and produces an elementary-stream byte buffer containing a single
//! I-picture: PSC + picture header + all GOBs + INTRA macroblocks +
//! zero-padded tail.
//!
//! The encoder is deliberately simple — no rate control, no dead-zone
//! tuning, no motion estimation. It's designed to be the minimum
//! decodable artefact so we can verify each piece of the VLC / DCT /
//! quantisation stack end-to-end against the reference decoder (either
//! our own or ffmpeg's).
//!
//! ## Picture layer (§4.2.1)
//!
//! ```text
//!   PSC (20)  0000 0000 0000 0001 0000
//!   TR  (5)   temporal reference
//!   PTYPE(6)  bit1 split, bit2 doccam, bit3 freeze-release,
//!             bit4 source format (0=QCIF, 1=CIF), bit5 HI_RES (1=off),
//!             bit6 spare (always 1 per §4.1)
//!   PEI (1)   0 — we never emit PSPARE
//! ```
//!
//! ## GOB layer (§4.2.2)
//!
//! ```text
//!   GBSC (16) 0000 0000 0000 0001
//!   GN   (4)  1..=12 (CIF) or 1,3,5 (QCIF)
//!   GQUANT(5) quantiser index 1..=31
//!   GEI  (1)  0 — we never emit GSPARE
//! ```
//!
//! ## MB layer — intra only
//!
//! For each of the 33 MBs in a GOB we emit:
//!
//! * MBA VLC — `Diff(1)` for the first coded MB, then 1-differences.
//! * MTYPE = `Intra` (4-bit `0001`), optionally `Intra+MQUANT` (7-bit
//!   `0000 001`) when GQUANT doesn't match the current MQUANT.
//! * CBP is absent for INTRA (all 6 blocks are always coded).
//! * 6 blocks (Y1, Y2, Y3, Y4, Cb, Cr). Each block is:
//!   * INTRA DC — 8-bit FLC per Table 6.
//!   * Zero or more AC `(run, level)` entries, each as a TCOEFF VLC
//!     prefix + sign bit, or 20-bit escape.
//!   * EOB — `10`.
//!
//! The residual path (INTER family + motion compensation) is a follow-up;
//! this module only ever produces I-pictures.

use oxideav_core::bits::BitWriter;
use oxideav_core::{Error, Result};

use crate::fdct::fdct_intra;
use crate::picture::SourceFormat;
use crate::quant::{quant_ac, quant_intra_dc};
use crate::tables::{
    encode_cbp, encode_mba_diff, lookup_tcoeff, MBA_STUFFING, MTYPE_INTRA, MTYPE_INTRA_MQUANT,
    ZIGZAG,
};

/// Minimum quantiser we use. QUANT=1 gives best quality but the DCT AC
/// dynamic range can push `|coeff|` past 2047 which doesn't fit the
/// level's 12-bit storage; we still clamp but quality stays fine.
pub const DEFAULT_QUANT: u32 = 8;

/// Encode a single INTRA picture.
///
/// `y`, `cb`, `cr` are packed planes with the specified strides. `quant`
/// is the GOB-level QUANT (1..=31). `temporal_reference` is the 5-bit TR
/// field (mod 32) the decoder uses for lip-sync.
pub fn encode_intra_picture(
    fmt: SourceFormat,
    y: &[u8],
    y_stride: usize,
    cb: &[u8],
    cb_stride: usize,
    cr: &[u8],
    cr_stride: usize,
    quant: u32,
    temporal_reference: u8,
) -> Result<Vec<u8>> {
    if !(1..=31).contains(&quant) {
        return Err(Error::invalid(format!(
            "h261 encode: QUANT out of range: {quant}"
        )));
    }
    if temporal_reference > 31 {
        return Err(Error::invalid(format!(
            "h261 encode: TR out of range: {temporal_reference}"
        )));
    }
    let (_w, h) = fmt.dimensions();
    let h = h as usize;
    if y.len() < y_stride * h || cb.len() < cb_stride * (h / 2) || cr.len() < cr_stride * (h / 2) {
        return Err(Error::invalid("h261 encode: input plane too short"));
    }

    let mut bw = BitWriter::with_capacity(4096);
    write_picture_header(&mut bw, fmt, temporal_reference);

    for &gn in fmt.gob_numbers() {
        write_gob_header(&mut bw, gn, quant);
        let (gob_x, gob_y) = gob_origin_luma(fmt, gn);
        encode_gob_intra(
            &mut bw, y, y_stride, cb, cb_stride, cr, cr_stride, gob_x, gob_y, quant,
        );
    }

    // Pad to a byte boundary with zeros — the H.261 decoder will either
    // consume the trailing PSC (we don't emit one) or hit EOF and flush.
    bw.align_to_byte();
    Ok(bw.finish())
}

/// Emit the 32-bit picture header (§4.2.1).
pub fn write_picture_header(bw: &mut BitWriter, fmt: SourceFormat, tr: u8) {
    bw.write_u32(0x00010, 20); // PSC
    bw.write_u32(tr as u32, 5); // TR
                                // PTYPE — six single-bit flags, MSB first.
                                // bit1 split-screen indicator off
    bw.write_u32(0, 1);
    // bit2 document-camera indicator off
    bw.write_u32(0, 1);
    // bit3 freeze-picture release off
    bw.write_u32(0, 1);
    // bit4 source format
    let fmt_bit = match fmt {
        SourceFormat::Qcif => 0,
        SourceFormat::Cif => 1,
    };
    bw.write_u32(fmt_bit, 1);
    // bit5 HI_RES — "1 = off" (we don't use Annex D).
    bw.write_u32(1, 1);
    // bit6 spare — per §4.1 unused bits are set to 1.
    bw.write_u32(1, 1);
    // PEI = 0 — no PSPARE.
    bw.write_u32(0, 1);
}

/// Emit a GOB header (§4.2.2) with the given GN and GQUANT.
pub fn write_gob_header(bw: &mut BitWriter, gn: u8, gquant: u32) {
    debug_assert!((1..=12).contains(&gn));
    debug_assert!((1..=31).contains(&gquant));
    bw.write_u32(0x0001, 16); // GBSC
    bw.write_u32(gn as u32, 4);
    bw.write_u32(gquant, 5);
    // GEI = 0 — no GSPARE.
    bw.write_u32(0, 1);
}

fn gob_origin_luma(fmt: SourceFormat, gn: u8) -> (usize, usize) {
    match fmt {
        SourceFormat::Cif => crate::gob::cif_gob_origin_luma(gn),
        SourceFormat::Qcif => crate::gob::qcif_gob_origin_luma(gn),
    }
}

/// Encode the 33 INTRA macroblocks of one GOB.
#[allow(clippy::too_many_arguments)]
fn encode_gob_intra(
    bw: &mut BitWriter,
    y: &[u8],
    y_stride: usize,
    cb: &[u8],
    cb_stride: usize,
    cr: &[u8],
    cr_stride: usize,
    gob_x: usize,
    gob_y: usize,
    quant: u32,
) {
    let mut prev_mba: u8 = 0;
    for mba in 1u8..=33 {
        // MBA difference — always 1 in a fully-coded intra GOB.
        let diff = mba - prev_mba;
        let (bits, code) = encode_mba_diff(diff);
        bw.write_u32(code, bits as u32);
        // MTYPE = INTRA (4-bit 0001). No MQUANT override — we reuse
        // GQUANT for every MB.
        bw.write_u32(MTYPE_INTRA.1, MTYPE_INTRA.0 as u32);

        // Block position in luma pels within the full picture.
        let mb_col = (mba - 1) as usize % 11;
        let mb_row = (mba - 1) as usize / 11;
        let luma_x = gob_x + mb_col * 16;
        let luma_y = gob_y + mb_row * 16;
        encode_intra_mb_blocks(
            bw, y, y_stride, cb, cb_stride, cr, cr_stride, luma_x, luma_y, quant,
        );

        prev_mba = mba;
    }
}

/// Extract the 8x8 intra pel block at `(bx, by)` from `plane` and run the
/// forward DCT + per-block encode.
#[allow(clippy::too_many_arguments)]
fn encode_intra_mb_blocks(
    bw: &mut BitWriter,
    y: &[u8],
    y_stride: usize,
    cb: &[u8],
    cb_stride: usize,
    cr: &[u8],
    cr_stride: usize,
    luma_x: usize,
    luma_y: usize,
    quant: u32,
) {
    // Y1..Y4
    for (sub_x, sub_y) in [(0, 0), (8, 0), (0, 8), (8, 8)] {
        let mut pels = [0u8; 64];
        extract_block(y, y_stride, luma_x + sub_x, luma_y + sub_y, &mut pels);
        encode_intra_block(bw, &pels, quant);
    }
    // Cb, Cr at chroma coords (luma / 2).
    let cx = luma_x / 2;
    let cy = luma_y / 2;
    let mut cb_pels = [0u8; 64];
    extract_block(cb, cb_stride, cx, cy, &mut cb_pels);
    encode_intra_block(bw, &cb_pels, quant);
    let mut cr_pels = [0u8; 64];
    extract_block(cr, cr_stride, cx, cy, &mut cr_pels);
    encode_intra_block(bw, &cr_pels, quant);
}

fn extract_block(plane: &[u8], stride: usize, x: usize, y: usize, out: &mut [u8; 64]) {
    for j in 0..8 {
        for i in 0..8 {
            let px = (y + j) * stride + (x + i);
            out[j * 8 + i] = plane.get(px).copied().unwrap_or(0);
        }
    }
}

/// Encode one 8x8 intra block: DC (8-bit FLC) + AC (TCOEFF VLCs) + EOB.
fn encode_intra_block(bw: &mut BitWriter, pels: &[u8; 64], quant: u32) {
    // Forward DCT.
    let mut coeffs = [0i32; 64];
    fdct_intra(pels, &mut coeffs);

    // DC first: raw-transform DC → FLC per Table 6.
    let dc_code = quant_intra_dc(coeffs[0]);
    bw.write_u32(dc_code as u32, 8);

    // AC coefficients: zigzag scan starting at index 1, RLE, then EOB.
    // ZIGZAG[i] is the raster position of the i-th coefficient in scan
    // order. Skip i=0 (DC already emitted).
    let mut zz_levels = [0i32; 63];
    for i in 1..64 {
        zz_levels[i - 1] = quant_ac(coeffs[ZIGZAG[i]], quant);
    }

    // Walk the scan collecting (run, level) pairs.
    let mut run: u32 = 0;
    for &lvl in zz_levels.iter() {
        if lvl == 0 {
            run += 1;
            continue;
        }
        emit_runlevel(bw, run as u8, lvl, /*is_first_inter=*/ false);
        run = 0;
    }
    // End of block — always `10`.
    bw.write_u32(0b10, 2);
}

/// Emit one (run, level) VLC entry. `is_first_inter` selects the special
/// "1s" first-coefficient code for INTER blocks (not used for INTRA;
/// Table 5 note (a): "Never used in INTRA macroblocks").
fn emit_runlevel(bw: &mut BitWriter, run: u8, level: i32, is_first_inter: bool) {
    debug_assert_ne!(level, 0);
    let abs = level.unsigned_abs() as u8;
    let sign = if level < 0 { 1 } else { 0 };

    // Special short code for run=0, abs=1: "1s" if first-in-inter, "11s" otherwise.
    if run == 0 && abs == 1 {
        if is_first_inter {
            bw.write_u32(1, 1); // `1`
        } else {
            bw.write_u32(0b11, 2); // `11`
        }
        bw.write_u32(sign, 1);
        return;
    }

    // Try VLC table lookup.
    if let Some((bits, code)) = lookup_tcoeff(run, abs) {
        bw.write_u32(code, bits as u32);
        bw.write_u32(sign, 1);
        return;
    }

    // Fallback: escape — 6-bit prefix `000001`, 6-bit run, 8-bit signed level.
    bw.write_u32(0b0000_01, 6);
    bw.write_u32(run as u32 & 0x3F, 6);
    // 8-bit two's complement, excluding the forbidden -128.
    let enc = if level < 0 {
        (level + 256) as u32
    } else {
        level as u32
    };
    bw.write_u32(enc & 0xFF, 8);
}

// Silence unused-import warnings until the CBP / MVD / mquant paths land.
#[allow(dead_code)]
fn _unused_refs() {
    let _ = encode_cbp(1);
    let _ = MBA_STUFFING;
    let _ = MTYPE_INTRA_MQUANT;
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::decoder::{decode_picture_body, pic_to_video_frame, H261Decoder};
    use crate::picture::parse_picture_header;
    use oxideav_core::bits::BitReader;
    use oxideav_core::packet::PacketFlags;
    use oxideav_core::Decoder;
    use oxideav_core::{CodecId, Frame, Packet, TimeBase};

    /// Build a neutral-grey QCIF YUV420 source (Y=128, Cb=Cr=128).
    fn neutral_qcif() -> (Vec<u8>, Vec<u8>, Vec<u8>) {
        let y = vec![128u8; 176 * 144];
        let cb = vec![128u8; 88 * 72];
        let cr = vec![128u8; 88 * 72];
        (y, cb, cr)
    }

    #[test]
    fn picture_header_roundtrip() {
        let mut bw = BitWriter::new();
        write_picture_header(&mut bw, SourceFormat::Qcif, 7);
        let bytes = bw.finish();
        let mut br = BitReader::new(&bytes);
        let hdr = parse_picture_header(&mut br).expect("parse");
        assert_eq!(hdr.temporal_reference, 7);
        assert_eq!(hdr.source_format, SourceFormat::Qcif);
        assert_eq!(hdr.width, 176);
        assert_eq!(hdr.height, 144);
    }

    #[test]
    fn gob_header_roundtrip() {
        let mut bw = BitWriter::new();
        write_gob_header(&mut bw, 3, 8);
        let bytes = bw.finish();
        let mut br = BitReader::new(&bytes);
        let hdr = crate::gob::parse_gob_header(&mut br).expect("parse GOB");
        assert_eq!(hdr.gn, 3);
        assert_eq!(hdr.gquant, 8);
    }

    #[test]
    fn encode_qcif_grey_roundtrips_through_our_decoder() {
        let (y, cb, cr) = neutral_qcif();
        let bytes = encode_intra_picture(
            SourceFormat::Qcif,
            &y,
            176,
            &cb,
            88,
            &cr,
            88,
            /*quant=*/ 8,
            /*tr=*/ 0,
        )
        .expect("encode");
        assert!(!bytes.is_empty());

        // Decode with our own decoder. We pass the bytes as a packet and
        // flush to force one frame out.
        let codec_id = CodecId::new(crate::CODEC_ID_STR);
        let mut decoder = H261Decoder::new(codec_id);
        let pkt = Packet {
            stream_index: 0,
            data: bytes,
            pts: Some(0),
            dts: Some(0),
            duration: None,
            time_base: TimeBase::new(1, 30_000),
            flags: PacketFlags {
                keyframe: true,
                ..Default::default()
            },
        };
        decoder.send_packet(&pkt).expect("send");
        decoder.flush().ok();
        let frame = decoder.receive_frame().expect("frame");
        let vf = match frame {
            Frame::Video(v) => v,
            _ => panic!("expected video"),
        };
        assert_eq!(vf.width, 176);
        assert_eq!(vf.height, 144);
        // All Y pels should be very close to 128.
        let y_plane = &vf.planes[0].data;
        let mut max_err = 0i32;
        for &p in y_plane {
            max_err = max_err.max((p as i32 - 128).abs());
        }
        assert!(max_err <= 2, "max Y error was {max_err}");
        // Chroma sanity.
        for &p in &vf.planes[1].data {
            assert!((p as i32 - 128).abs() <= 2);
        }
        for &p in &vf.planes[2].data {
            assert!((p as i32 - 128).abs() <= 2);
        }
    }

    #[test]
    fn encode_cif_grey_roundtrips() {
        let y = vec![128u8; 352 * 288];
        let cb = vec![128u8; 176 * 144];
        let cr = vec![128u8; 176 * 144];
        let bytes = encode_intra_picture(SourceFormat::Cif, &y, 352, &cb, 176, &cr, 176, 8, 0)
            .expect("encode cif");
        assert!(!bytes.is_empty());

        // Also parse the body with our low-level helper to confirm all 12 GOBs present.
        let mut br = BitReader::new(&bytes);
        let hdr = parse_picture_header(&mut br).expect("pic header");
        let pic = decode_picture_body(&mut br, &hdr, &bytes, None).expect("body");
        let vf = pic_to_video_frame(&pic, Some(0), TimeBase::new(1, 30_000));
        assert_eq!(vf.width, 352);
        assert_eq!(vf.height, 288);
        for &p in &vf.planes[0].data {
            assert!((p as i32 - 128).abs() <= 2, "Y pel {p} too far from 128");
        }
    }

    #[test]
    fn encode_qcif_gradient_plausible_decode() {
        // Build a horizontal Y gradient 32..=224 across the 176 columns.
        let w = 176usize;
        let h = 144usize;
        let mut y = vec![0u8; w * h];
        for j in 0..h {
            for i in 0..w {
                y[j * w + i] = (32 + (i * 192) / w) as u8;
            }
        }
        let cb = vec![128u8; (w / 2) * (h / 2)];
        let cr = vec![128u8; (w / 2) * (h / 2)];
        let bytes = encode_intra_picture(SourceFormat::Qcif, &y, w, &cb, w / 2, &cr, w / 2, 8, 0)
            .expect("encode gradient");

        let mut decoder = H261Decoder::new(CodecId::new(crate::CODEC_ID_STR));
        let pkt = Packet {
            stream_index: 0,
            data: bytes,
            pts: Some(0),
            dts: Some(0),
            duration: None,
            time_base: TimeBase::new(1, 30_000),
            flags: PacketFlags {
                keyframe: true,
                ..Default::default()
            },
        };
        decoder.send_packet(&pkt).expect("send");
        decoder.flush().ok();
        let frame = decoder.receive_frame().expect("frame");
        let vf = match frame {
            Frame::Video(v) => v,
            _ => panic!("video"),
        };
        // Just check a few sample points are within a reasonable quantisation error.
        let y = &vf.planes[0].data;
        let sample = |x: usize, yy: usize| y[yy * w + x] as i32;
        let expected = |x: usize| 32 + (x * 192) as i32 / w as i32;
        // Pick centre of a few macroblocks.
        for &x in &[24usize, 80, 152] {
            let got = sample(x, 72);
            let want = expected(x);
            assert!(
                (got - want).abs() <= 40,
                "gradient at x={x}: got {got}, want ~{want}"
            );
        }
    }
}