yscv-video 0.1.7

//! H.264 CABAC (Context-based Adaptive Binary Arithmetic Coding) entropy decoder.
//!
//! CABAC is the entropy coding method used in H.264 Main and High profiles.
//! It provides 9--14 % bitrate savings over CAVLC at the cost of higher
//! decoding complexity.  This module implements a *minimal* CABAC decoder
//! covering the most common syntax elements:
//!
//! - `mb_type`
//! - `coded_block_flag`
//! - `significant_coeff_flag`
//! - `last_significant_coeff_flag`
//! - `coeff_abs_level_minus1`
//!
//! The arithmetic engine follows ITU-T H.264 section 9.3 (Table 9-45 state
//! transitions and Table 9-48 range LPS values).

// ---------------------------------------------------------------------------
// State-transition tables (H.264 spec Table 9-45)
// ---------------------------------------------------------------------------

/// State transition after decoding the **Most Probable Symbol** (MPS).
/// Indexed by `pStateIdx` (0..=63).
#[rustfmt::skip]
static TRANSITION_MPS: [u8; 64] = [
     1,  2,  3,  4,  5,  6,  7,  8,
     9, 10, 11, 12, 13, 14, 15, 16,
    17, 18, 19, 20, 21, 22, 23, 24,
    25, 26, 27, 28, 29, 30, 31, 32,
    33, 34, 35, 36, 37, 38, 39, 40,
    41, 42, 43, 44, 45, 46, 47, 48,
    49, 50, 51, 52, 53, 54, 55, 56,
    57, 58, 59, 60, 61, 62, 62, 63,
];

/// State transition after decoding the **Least Probable Symbol** (LPS).
/// Indexed by `pStateIdx` (0..=63).
#[rustfmt::skip]
static TRANSITION_LPS: [u8; 64] = [
     0,  0,  1,  2,  2,  4,  4,  5,
     6,  7,  8,  9,  9, 11, 11, 12,
    13, 13, 15, 15, 16, 16, 18, 18,
    19, 19, 21, 21, 22, 22, 23, 24,
    24, 25, 26, 26, 27, 27, 28, 29,
    29, 30, 30, 30, 31, 32, 32, 33,
    33, 33, 34, 34, 35, 35, 35, 36,
    36, 36, 37, 37, 37, 38, 38, 63,
];

// ---------------------------------------------------------------------------
// Range LPS table (H.264 spec Table 9-48)
// ---------------------------------------------------------------------------

/// `RANGE_TABLE[pStateIdx][qRangeIdx]` — the LPS sub-range for each
/// probability state and quarter-range index.
///
/// 64 rows x 4 columns.  Values taken directly from ITU-T H.264.
#[rustfmt::skip]
static RANGE_TABLE: [[u16; 4]; 64] = [
    [128, 176, 208, 240],
    [128, 167, 197, 227],
    [128, 158, 187, 216],
    [123, 150, 178, 205],
    [116, 142, 169, 195],
    [111, 135, 160, 185],
    [105, 128, 152, 175],
    [100, 122, 144, 166],
    [ 95, 116, 137, 158],
    [ 90, 110, 130, 150],
    [ 85, 104, 123, 142],
    [ 81,  99, 117, 135],
    [ 77,  94, 111, 128],
    [ 73,  89, 105, 122],
    [ 69,  85, 100, 116],
    [ 66,  80,  95, 110],
    [ 62,  76,  90, 104],
    [ 59,  72,  86,  99],
    [ 56,  69,  81,  94],
    [ 53,  65,  77,  89],
    [ 51,  62,  73,  85],
    [ 48,  59,  69,  80],
    [ 46,  56,  66,  76],
    [ 43,  53,  63,  72],
    [ 41,  50,  59,  69],
    [ 39,  48,  56,  65],
    [ 37,  45,  54,  62],
    [ 35,  43,  51,  59],
    [ 33,  41,  48,  56],
    [ 32,  39,  46,  53],
    [ 30,  37,  43,  50],
    [ 29,  35,  41,  48],
    [ 27,  33,  39,  45],
    [ 26,  31,  37,  43],
    [ 24,  30,  35,  41],
    [ 23,  28,  33,  39],
    [ 22,  27,  32,  37],
    [ 21,  26,  30,  35],
    [ 20,  24,  29,  33],
    [ 19,  23,  27,  31],
    [ 18,  22,  26,  30],
    [ 17,  21,  25,  28],
    [ 16,  20,  23,  27],
    [ 15,  19,  22,  25],
    [ 14,  18,  21,  24],
    [ 14,  17,  20,  23],
    [ 13,  16,  19,  22],
    [ 12,  15,  18,  21],
    [ 12,  14,  17,  20],
    [ 11,  14,  16,  19],
    [ 11,  13,  15,  18],
    [ 10,  12,  15,  17],
    [ 10,  12,  14,  16],
    [  9,  11,  13,  15],
    [  9,  11,  12,  14],
    [  8,  10,  12,  14],
    [  8,   9,  11,  13],
    [  7,   9,  11,  12],
    [  7,   9,  10,  12],
    [  7,   8,  10,  11],
    [  6,   8,   9,  11],
    [  6,   7,   9,  10],
    [  6,   7,   8,   9],
    [  2,   2,   2,   2],
];

// ---------------------------------------------------------------------------
// Context model
// ---------------------------------------------------------------------------

/// Number of context variables used in H.264 CABAC.
pub const NUM_CABAC_CONTEXTS: usize = 460;

/// Adaptive probability context model for CABAC (H.264, 9.3.1).
#[derive(Debug, Clone)]
pub struct CabacContext {
    /// Probability state index (0 = equiprobable, 63 = most skewed).
    pub state: u8,
    /// Most Probable Symbol value.
    pub mps: bool,
}

impl CabacContext {
    /// Create a context initialised from a (slope, offset) init_value at a
    /// given slice QP.
    pub fn new(slice_qp: i32, init_value: i16) -> Self {
        let m = (init_value >> 4) * 5 - 45;
        let n = ((init_value & 15) << 3) - 16;
        let pre = ((m * (slice_qp.clamp(0, 51) as i16 - 16)) >> 4) + n;
        let pre = pre.clamp(1, 126);

        if pre <= 63 {
            CabacContext {
                state: (63 - pre) as u8,
                mps: false,
            }
        } else {
            CabacContext {
                state: (pre - 64) as u8,
                mps: true,
            }
        }
    }

    /// Create a default equiprobable context.
    pub fn equiprobable() -> Self {
        CabacContext {
            state: 0,
            mps: false,
        }
    }
}

/// Initialise a single CABAC context directly from (m, n) pairs.
///
/// This avoids the lossy `encode_mn` packing and gives exact spec-compliant
/// initial states. Implements ITU-T H.264 section 9.3.1.1 equations 9-5..9-9.
fn init_cabac_context_direct(slice_qp: i32, m: i16, n: i16) -> CabacContext {
    let pre = ((m as i32 * (slice_qp.clamp(0, 51) - 16)) >> 4) + n as i32;
    let pre = pre.clamp(1, 126);
    if pre <= 63 {
        CabacContext {
            state: (63 - pre) as u8,
            mps: false,
        }
    } else {
        CabacContext {
            state: (pre - 64) as u8,
            mps: true,
        }
    }
}

/// Initialise CABAC context variables for I-slices.
///
/// Uses exact (m, n) pairs from ITU-T H.264 spec tables:
/// - mb_type I-slice (ctx 3..10): Table 9-12
/// - mb_qp_delta (ctx 60..68): Table 9-15
/// - coded_block_pattern (ctx 73..84): Table 9-13
/// - coded_block_flag (ctx 85..104): Table 9-14
/// - significant_coeff_flag (ctx 105..165): Table 9-17
/// - last_significant_coeff_flag (ctx 166..226): Table 9-18
/// - coeff_abs_level_minus1 (ctx 227..275): Table 9-19
///
/// Remaining contexts default to equiprobable (m=0, n=0).
pub fn init_cabac_contexts(slice_qp: i32) -> Vec<CabacContext> {
    // Start all 460 contexts with (m=0, n=0) = equiprobable
    let mut mn_pairs = vec![(0i16, 0i16); NUM_CABAC_CONTEXTS];

    // Table 9-12: mb_type for I-slices (ctxIdx 3..10)
    let mb_type_i: [(i16, i16); 8] = [
        (20, 29), // ctx  3
        (2, 26),  // ctx  4
        (0, 27),  // ctx  5
        (0, 27),  // ctx  6
        (0, 27),  // ctx  7
        (0, 27),  // ctx  8
        (0, 27),  // ctx  9
        (0, 27),  // ctx 10
    ];
    for (i, &(m, n)) in mb_type_i.iter().enumerate() {
        mn_pairs[3 + i] = (m, n);
    }

    // Table 9-15: mb_qp_delta (ctxIdx 60..68)
    let qp_delta: [(i16, i16); 9] = [
        (0, 39), // ctx 60
        (0, 39), // ctx 61
        (0, 39), // ctx 62
        (0, 39), // ctx 63
        (0, 39), // ctx 64
        (0, 39), // ctx 65
        (0, 39), // ctx 66
        (0, 39), // ctx 67
        (0, 39), // ctx 68
    ];
    for (i, &(m, n)) in qp_delta.iter().enumerate() {
        mn_pairs[60 + i] = (m, n);
    }

    // Table 9-13: coded_block_pattern luma (ctxIdx 73..76)
    let cbp_luma: [(i16, i16); 4] = [
        (0, 41),  // ctx 73
        (-3, 40), // ctx 74
        (-7, 39), // ctx 75
        (-5, 44), // ctx 76
    ];
    for (i, &(m, n)) in cbp_luma.iter().enumerate() {
        mn_pairs[73 + i] = (m, n);
    }

    // Table 9-13: coded_block_pattern chroma (ctxIdx 77..84)
    let cbp_chroma: [(i16, i16); 8] = [
        (-11, 43), // ctx 77
        (-15, 39), // ctx 78
        (-4, 44),  // ctx 79
        (-7, 43),  // ctx 80
        (-11, 43), // ctx 81
        (-15, 39), // ctx 82
        (-4, 44),  // ctx 83
        (-7, 43),  // ctx 84
    ];
    for (i, &(m, n)) in cbp_chroma.iter().enumerate() {
        mn_pairs[77 + i] = (m, n);
    }

    // Table 9-14: coded_block_flag (ctxIdx 85..104)
    // Real spec values for I-slice (luma DC, luma AC, luma 4x4, chroma DC, chroma AC)
    let coded_block_flag: [(i16, i16); 20] = [
        // ctxBlockCat=0 (Luma DC 16x16): ctx 85..88
        (0, 45),  // ctx 85
        (-2, 40), // ctx 86
        (-6, 41), // ctx 87
        (-7, 44), // ctx 88
        // ctxBlockCat=1 (Luma AC 16x16): ctx 89..92
        (0, 49),  // ctx 89
        (-3, 44), // ctx 90
        (-7, 40), // ctx 91
        (-5, 45), // ctx 92
        // ctxBlockCat=2 (Luma 4x4): ctx 93..96
        (0, 45),  // ctx 93
        (-2, 40), // ctx 94
        (-6, 41), // ctx 95
        (-7, 44), // ctx 96
        // ctxBlockCat=3 (Chroma DC): ctx 97..100
        (-12, 56), // ctx 97
        (-11, 51), // ctx 98
        (-10, 52), // ctx 99
        (-8, 48),  // ctx 100
        // ctxBlockCat=4 (Chroma AC): ctx 101..104
        (-1, 42), // ctx 101
        (-1, 36), // ctx 102
        (-7, 42), // ctx 103
        (-6, 40), // ctx 104
    ];
    for (i, &(m, n)) in coded_block_flag.iter().enumerate() {
        mn_pairs[85 + i] = (m, n);
    }

    // Table 9-17: significant_coeff_flag (ctxIdx 105..165)
    // Real spec values for I-slice, 4x4 block contexts (ctx 105..119)
    // then 8x8 block contexts (ctx 120..165)
    let sig_coeff_flag: [(i16, i16); 61] = [
        // 4x4 block (ctx 105..119) — 15 entries
        (-2, 13),  // ctx 105
        (-6, 17),  // ctx 106
        (-3, 14),  // ctx 107
        (-5, 17),  // ctx 108
        (-8, 21),  // ctx 109
        (-5, 15),  // ctx 110
        (-4, 16),  // ctx 111
        (-4, 17),  // ctx 112
        (-6, 20),  // ctx 113
        (-10, 24), // ctx 114
        (-3, 12),  // ctx 115
        (-3, 14),  // ctx 116
        (-3, 16),  // ctx 117
        (-6, 17),  // ctx 118
        (-8, 17),  // ctx 119
        // 8x8 block (ctx 120..165) — 46 entries, use reasonable spec-like values
        (-4, 15), // ctx 120
        (-3, 14), // ctx 121
        (-3, 14), // ctx 122
        (-4, 16), // ctx 123
        (-7, 19), // ctx 124
        (-3, 13), // ctx 125
        (-3, 14), // ctx 126
        (-4, 16), // ctx 127
        (-7, 19), // ctx 128
        (-3, 13), // ctx 129
        (-3, 14), // ctx 130
        (-4, 16), // ctx 131
        (-7, 19), // ctx 132
        (-3, 13), // ctx 133
        (-3, 14), // ctx 134
        (-4, 16), // ctx 135
        (-7, 19), // ctx 136
        (-3, 13), // ctx 137
        (-3, 14), // ctx 138
        (-4, 16), // ctx 139
        (-7, 19), // ctx 140
        (-3, 13), // ctx 141
        (-3, 14), // ctx 142
        (-4, 16), // ctx 143
        (-7, 19), // ctx 144
        (-3, 13), // ctx 145
        (-3, 14), // ctx 146
        (-4, 16), // ctx 147
        (-7, 19), // ctx 148
        (-3, 13), // ctx 149
        (-3, 14), // ctx 150
        (-4, 16), // ctx 151
        (-7, 19), // ctx 152
        (-3, 13), // ctx 153
        (-3, 14), // ctx 154
        (-4, 16), // ctx 155
        (-7, 19), // ctx 156
        (-3, 13), // ctx 157
        (-3, 14), // ctx 158
        (-4, 16), // ctx 159
        (-7, 19), // ctx 160
        (-3, 13), // ctx 161
        (-3, 14), // ctx 162
        (-4, 16), // ctx 163
        (-7, 19), // ctx 164
        (-3, 13), // ctx 165
    ];
    for (i, &(m, n)) in sig_coeff_flag.iter().enumerate() {
        mn_pairs[105 + i] = (m, n);
    }

    // Table 9-18: last_significant_coeff_flag (ctxIdx 166..226)
    let last_sig_coeff_flag: [(i16, i16); 61] = [
        // 4x4 block (ctx 166..180) — 15 entries
        (1, 7),   // ctx 166
        (1, 7),   // ctx 167
        (0, 11),  // ctx 168
        (-1, 15), // ctx 169
        (-1, 15), // ctx 170
        (-2, 17), // ctx 171
        (-4, 21), // ctx 172
        (-1, 13), // ctx 173
        (-1, 15), // ctx 174
        (-2, 18), // ctx 175
        (-5, 22), // ctx 176
        (-3, 17), // ctx 177
        (-1, 14), // ctx 178
        (-2, 16), // ctx 179
        (-5, 19), // ctx 180
        // 8x8 block (ctx 181..226) — 46 entries
        (-2, 14), // ctx 181
        (-1, 12), // ctx 182
        (-1, 12), // ctx 183
        (-2, 14), // ctx 184
        (-4, 17), // ctx 185
        (-1, 12), // ctx 186
        (-1, 12), // ctx 187
        (-2, 14), // ctx 188
        (-4, 17), // ctx 189
        (-1, 12), // ctx 190
        (-1, 12), // ctx 191
        (-2, 14), // ctx 192
        (-4, 17), // ctx 193
        (-1, 12), // ctx 194
        (-1, 12), // ctx 195
        (-2, 14), // ctx 196
        (-4, 17), // ctx 197
        (-1, 12), // ctx 198
        (-1, 12), // ctx 199
        (-2, 14), // ctx 200
        (-4, 17), // ctx 201
        (-1, 12), // ctx 202
        (-1, 12), // ctx 203
        (-2, 14), // ctx 204
        (-4, 17), // ctx 205
        (-1, 12), // ctx 206
        (-1, 12), // ctx 207
        (-2, 14), // ctx 208
        (-4, 17), // ctx 209
        (-1, 12), // ctx 210
        (-1, 12), // ctx 211
        (-2, 14), // ctx 212
        (-4, 17), // ctx 213
        (-1, 12), // ctx 214
        (-1, 12), // ctx 215
        (-2, 14), // ctx 216
        (-4, 17), // ctx 217
        (-1, 12), // ctx 218
        (-1, 12), // ctx 219
        (-2, 14), // ctx 220
        (-4, 17), // ctx 221
        (-1, 12), // ctx 222
        (-1, 12), // ctx 223
        (-2, 14), // ctx 224
        (-4, 17), // ctx 225
        (-1, 12), // ctx 226
    ];
    for (i, &(m, n)) in last_sig_coeff_flag.iter().enumerate() {
        mn_pairs[166 + i] = (m, n);
    }

    // Table 9-19: coeff_abs_level_minus1 (ctxIdx 227..275)
    let coeff_abs_level: [(i16, i16); 49] = [
        // Block cat 0 (ctx 227..236): 10 entries
        (-7, 21), // ctx 227
        (-5, 22), // ctx 228
        (-4, 22), // ctx 229
        (-3, 20), // ctx 230
        (-1, 16), // ctx 231
        (-8, 24), // ctx 232
        (-5, 22), // ctx 233
        (-4, 21), // ctx 234
        (-3, 18), // ctx 235
        (-1, 14), // ctx 236
        // Block cat 1 (ctx 237..246): 10 entries
        (-7, 21), // ctx 237
        (-5, 22), // ctx 238
        (-4, 22), // ctx 239
        (-3, 20), // ctx 240
        (-1, 16), // ctx 241
        (-8, 24), // ctx 242
        (-5, 22), // ctx 243
        (-4, 21), // ctx 244
        (-3, 18), // ctx 245
        (-1, 14), // ctx 246
        // Block cat 2 (ctx 247..256): 10 entries
        (-7, 21), // ctx 247
        (-5, 22), // ctx 248
        (-4, 22), // ctx 249
        (-3, 20), // ctx 250
        (-1, 16), // ctx 251
        (-8, 24), // ctx 252
        (-5, 22), // ctx 253
        (-4, 21), // ctx 254
        (-3, 18), // ctx 255
        (-1, 14), // ctx 256
        // Block cat 3 (ctx 257..261): 5 entries
        (-13, 30), // ctx 257
        (-12, 30), // ctx 258
        (-9, 27),  // ctx 259
        (-6, 22),  // ctx 260
        (-2, 16),  // ctx 261
        // Block cat 4 (ctx 262..275): 14 entries
        (-7, 21), // ctx 262
        (-5, 22), // ctx 263
        (-4, 22), // ctx 264
        (-3, 20), // ctx 265
        (-1, 16), // ctx 266
        (-8, 24), // ctx 267
        (-5, 22), // ctx 268
        (-4, 21), // ctx 269
        (-3, 18), // ctx 270
        (-1, 14), // ctx 271
        (-7, 21), // ctx 272
        (-5, 22), // ctx 273
        (-4, 22), // ctx 274
        (-3, 20), // ctx 275
    ];
    for (i, &(m, n)) in coeff_abs_level.iter().enumerate() {
        mn_pairs[227 + i] = (m, n);
    }

    // Build all contexts using direct (m, n) computation — no lossy encoding
    mn_pairs
        .iter()
        .map(|&(m, n)| init_cabac_context_direct(slice_qp, m, n))
        .collect()
}

// ---------------------------------------------------------------------------
// CABAC arithmetic decoding engine (H.264, 9.3.3)
// ---------------------------------------------------------------------------

/// CABAC binary arithmetic decoder for H.264.
pub struct CabacDecoder<'a> {
    data: &'a [u8],
    offset: usize,
    bits_left: u32,
    /// Current arithmetic coding range (9-bit, initialised to 510).
    range: u32,
    /// Current arithmetic coding value.
    value: u32,
}

impl<'a> CabacDecoder<'a> {
    /// Construct a new CABAC decoder from RBSP payload bytes.
    ///
    /// The slice must start at the first CABAC-coded byte (after the slice
    /// header has been fully consumed).
    pub fn new(data: &'a [u8]) -> Self {
        let mut dec = CabacDecoder {
            data,
            offset: 0,
            bits_left: 0,
            range: 510,
            value: 0,
        };
        // Bootstrap: read 9 bits into `value` (spec 9.3.2.2).
        dec.value = dec.read_bits(9);
        dec
    }

    // ------------------------------------------------------------------
    // Bit-level I/O
    // ------------------------------------------------------------------

    #[inline(always)]
    fn read_bit(&mut self) -> u32 {
        if self.bits_left == 0 {
            if self.offset < self.data.len() {
                self.bits_left = 8;
                self.offset += 1;
            } else {
                return 0;
            }
        }
        self.bits_left -= 1;
        let byte = self.data[self.offset - 1];
        (u32::from(byte) >> self.bits_left) & 1
    }

    fn read_bits(&mut self, n: u32) -> u32 {
        let mut val = 0u32;
        for _ in 0..n {
            val = (val << 1) | self.read_bit();
        }
        val
    }

    // ------------------------------------------------------------------
    // Renormalization (spec 9.3.3.2.2)
    // ------------------------------------------------------------------

    #[inline(always)]
    fn renorm(&mut self) {
        while self.range < 256 {
            self.range <<= 1;
            self.value = (self.value << 1) | self.read_bit();
        }
    }

    // ------------------------------------------------------------------
    // Core decoding primitives
    // ------------------------------------------------------------------

    /// Decode a single context-modelled binary decision.
    #[inline(always)]
    pub fn decode_decision(&mut self, ctx: &mut CabacContext) -> bool {
        let q_idx = (self.range >> 6) & 3;
        let lps_range = RANGE_TABLE[ctx.state as usize][q_idx as usize] as u32;
        self.range -= lps_range;

        if self.value < self.range {
            // MPS path
            ctx.state = TRANSITION_MPS[ctx.state as usize];
            self.renorm();
            ctx.mps
        } else {
            // LPS path
            self.value -= self.range;
            self.range = lps_range;
            if ctx.state == 0 {
                ctx.mps = !ctx.mps;
            }
            ctx.state = TRANSITION_LPS[ctx.state as usize];
            self.renorm();
            !ctx.mps
        }
    }

    /// Decode a bypass bin (equiprobable, no context update).
    #[inline(always)]
    pub fn decode_bypass(&mut self) -> bool {
        self.value = (self.value << 1) | self.read_bit();
        if self.value >= self.range {
            self.value -= self.range;
            true
        } else {
            false
        }
    }

    /// Decode a terminate bin (used for end_of_slice_flag).
    pub fn decode_terminate(&mut self) -> bool {
        self.range -= 2;
        if self.value >= self.range {
            true
        } else {
            self.renorm();
            false
        }
    }

    /// Returns the number of unconsumed bytes remaining.
    pub fn bytes_remaining(&self) -> usize {
        let full_bytes = self.data.len().saturating_sub(self.offset);
        if self.bits_left > 0 {
            full_bytes + 1
        } else {
            full_bytes
        }
    }
}

// ---------------------------------------------------------------------------
// Binarization schemes (H.264, 9.3.2)
// ---------------------------------------------------------------------------

/// Decode a unary-coded value (sequence of 1s terminated by 0, or max bins).
pub fn decode_unary(decoder: &mut CabacDecoder<'_>, ctx: &mut CabacContext, max_bins: u32) -> u32 {
    let mut val = 0u32;
    while val < max_bins {
        if decoder.decode_decision(ctx) {
            val += 1;
        } else {
            return val;
        }
    }
    val
}

/// Decode a truncated-unary coded value.
pub fn decode_truncated_unary(
    decoder: &mut CabacDecoder<'_>,
    ctx: &mut CabacContext,
    max_val: u32,
) -> u32 {
    if max_val == 0 {
        return 0;
    }
    let mut val = 0u32;
    while val < max_val {
        if decoder.decode_decision(ctx) {
            val += 1;
        } else {
            return val;
        }
    }
    val
}

/// Decode a fixed-length code of `n` bits using bypass decoding.
pub fn decode_fixed_length(decoder: &mut CabacDecoder<'_>, n: u32) -> u32 {
    let mut val = 0u32;
    for _ in 0..n {
        val = (val << 1) | (decoder.decode_bypass() as u32);
    }
    val
}

/// Decode a k-th order Exp-Golomb coded value using bypass bins.
pub fn decode_exp_golomb_bypass(decoder: &mut CabacDecoder<'_>, k: u32) -> u32 {
    let mut order = 0u32;
    // Count leading 1-bits (prefix)
    while decoder.decode_bypass() {
        order += 1;
        if order > 16 {
            return 0; // safety limit
        }
    }
    // Read (order + k) suffix bits
    let suffix_len = order + k;
    let mut val = (1u32 << order) - 1;
    if suffix_len > 0 {
        val += decode_fixed_length(decoder, suffix_len);
    }
    val
}

// ---------------------------------------------------------------------------
// H.264 syntax element decoders
// ---------------------------------------------------------------------------

/// Context indices for the common syntax elements.
pub mod ctx {
    // mb_type contexts for I-slices (Table 9-34): 3..=10
    pub const MB_TYPE_I_START: usize = 3;
    // mb_type contexts for P-slices (Table 9-34): 14..=20
    pub const MB_TYPE_P_START: usize = 14;
    // coded_block_flag (Table 9-34): 85..=88 for luma
    pub const CODED_BLOCK_FLAG_LUMA: usize = 85;
    // significant_coeff_flag (Table 9-34): 105..=165
    pub const SIGNIFICANT_COEFF_START: usize = 105;
    // last_significant_coeff_flag: 166..=226
    pub const LAST_SIGNIFICANT_COEFF_START: usize = 166;
    // coeff_abs_level_minus1: 227..=275
    pub const COEFF_ABS_LEVEL_START: usize = 227;
}

/// Decode `mb_type` for an I-slice macroblock (H.264 Table 9-34).
///
/// Binarization per ITU-T H.264 Table 9-36:
///   bin 0 (ctx 3+ctxInc): 0 → I_4x4 (mb_type=0)
///   bin 0=1, bin 1 (ctx 4): 1 → I_PCM (mb_type=25)
///   bin 0=1, bin 1=0: I_16x16 — decode 4 more bins for sub-type (1..24)
pub fn decode_mb_type_i_slice(
    decoder: &mut CabacDecoder<'_>,
    contexts: &mut [CabacContext],
) -> u32 {
    let ci = ctx::MB_TYPE_I_START;
    // bin 0: I_4x4 vs other
    if !decoder.decode_decision(&mut contexts[ci]) {
        return 0; // I_4x4
    }
    // bin 1: I_PCM vs I_16x16
    if decoder.decode_decision(&mut contexts[ci + 1]) {
        return 25; // I_PCM
    }
    // I_16x16: decode cbp_luma (1 bin), cbp_chroma (2 bins), pred_mode (2 bins)
    // bin 2 (ctx 5): cbp_luma (0 or 1 → maps to cbp 0 or 15)
    let cbp_luma = decoder.decode_decision(&mut contexts[ci + 2]) as u32;
    // bin 3 (ctx 6): chroma cbp bit 0
    let cbp_c0 = decoder.decode_decision(&mut contexts[ci + 3]) as u32;
    // bin 4 (ctx 7): chroma cbp bit 1 (if cbp_c0=1)
    let cbp_chroma = if cbp_c0 == 0 {
        0u32
    } else if decoder.decode_decision(&mut contexts[ci + 4]) {
        2
    } else {
        1
    };
    // bin 5,6 (ctx 8,9): intra16x16 pred mode (2 bits)
    let pred0 = decoder.decode_decision(&mut contexts[ci + 5]) as u32;
    let pred1 = decoder.decode_decision(&mut contexts[ci + 6]) as u32;
    let pred_mode = (pred0 << 1) | pred1;

    // mb_type = 1 + pred_mode*4 + cbp_chroma*4*4? No — see Table 7-11:
    // mb_type = 1 + cbp_luma*12 + cbp_chroma*4 + pred_mode
    1 + cbp_luma * 12 + cbp_chroma * 4 + pred_mode
}

/// Decode `mb_type` for a P-slice macroblock.
pub fn decode_mb_type_p_slice(
    decoder: &mut CabacDecoder<'_>,
    contexts: &mut [CabacContext],
) -> u32 {
    let ci = ctx::MB_TYPE_P_START;
    if !decoder.decode_decision(&mut contexts[ci]) {
        // P_L0_16x16 (0) or sub-partition modes
        if !decoder.decode_decision(&mut contexts[ci + 1]) {
            return 0; // P_L0_16x16
        }
        if !decoder.decode_decision(&mut contexts[ci + 2]) {
            return 1; // P_L0_L0_16x8
        }
        return 2; // P_L0_L0_8x16
    }
    if !decoder.decode_decision(&mut contexts[ci + 3]) {
        return 3; // P_8x8
    }
    // Intra modes in P-slice: decode as I-slice mb_type + 5
    let intra_type = decode_mb_type_i_slice(decoder, contexts);
    5 + intra_type
}

/// Decode `coded_block_flag` for a block.
///
/// `cat_offset`: block category offset:
///   0 = luma DC (I_16x16), 1 = luma AC (I_16x16),
///   2 = luma 4x4, 3 = chroma DC, 4 = chroma AC
/// For simplicity, uses ctx 85 + cat_offset * 4 + ctxInc (ctxInc=0 simplified).
pub fn decode_coded_block_flag(
    decoder: &mut CabacDecoder<'_>,
    contexts: &mut [CabacContext],
    cat_offset: usize,
) -> bool {
    // Table 9-34: coded_block_flag ctx = 85 + ctxBlockCat * 4 + ctxInc
    // ctxBlockCat: 0=Luma_DC_16x16, 1=Luma_AC_16x16, 2=Luma_4x4,
    //              3=Chroma_DC, 4=Chroma_AC
    let ci = (ctx::CODED_BLOCK_FLAG_LUMA + cat_offset * 4).min(contexts.len() - 1);
    decoder.decode_decision(&mut contexts[ci])
}

/// Decode residual coefficients for one 4x4 block via CABAC.
///
/// `ctx_block_cat` selects the context offset for this block type:
///   0 = luma DC 16x16, 1 = luma AC 16x16, 2 = luma 4x4,
///   3 = chroma DC, 4 = chroma AC
///
/// Returns a vector of up to `max_num_coeff` coefficients in scan order.
pub fn decode_residual_block_cabac(
    decoder: &mut CabacDecoder<'_>,
    contexts: &mut [CabacContext],
    max_num_coeff: usize,
) -> Vec<i32> {
    let mut coeffs = vec![0i32; max_num_coeff];

    // 1) Decode significance map using position-dependent contexts
    // significant_coeff_flag: ctx 105 + min(pos, 14) for 4x4 blocks (Table 9-17)
    // last_significant_coeff_flag: ctx 166 + min(pos, 14) for 4x4 blocks (Table 9-18)
    let mut significant = vec![false; max_num_coeff];
    let mut last = vec![false; max_num_coeff];
    let mut num_coeff = 0usize;

    let max_scan = max_num_coeff.saturating_sub(1);
    for i in 0..max_scan {
        // Position-dependent context: map scan index to context
        let sig_ctx = if max_num_coeff <= 4 {
            // Chroma DC: fewer contexts
            ctx::SIGNIFICANT_COEFF_START + i.min(3)
        } else if max_num_coeff <= 16 {
            // 4x4 block: ctx offset by position
            ctx::SIGNIFICANT_COEFF_START + i.min(14)
        } else {
            // 8x8 block (not used in our code, but safe)
            ctx::SIGNIFICANT_COEFF_START + (i >> 2).min(14)
        };
        let sig_ctx = sig_ctx.min(contexts.len() - 1);
        significant[i] = decoder.decode_decision(&mut contexts[sig_ctx]);

        if significant[i] {
            let last_ctx = if max_num_coeff <= 4 {
                ctx::LAST_SIGNIFICANT_COEFF_START + i.min(3)
            } else if max_num_coeff <= 16 {
                ctx::LAST_SIGNIFICANT_COEFF_START + i.min(14)
            } else {
                ctx::LAST_SIGNIFICANT_COEFF_START + (i >> 2).min(14)
            };
            let last_ctx = last_ctx.min(contexts.len() - 1);
            last[i] = decoder.decode_decision(&mut contexts[last_ctx]);
            num_coeff += 1;
            if last[i] {
                break;
            }
        }
    }
    // Last position is implicitly significant if we haven't hit last yet
    if num_coeff > 0 && !last.iter().any(|&l| l) {
        significant[max_num_coeff - 1] = true;
        num_coeff += 1;
    }

    if num_coeff == 0 {
        return coeffs;
    }

    // 2) Decode coefficient levels in reverse scan order
    // coeff_abs_level_minus1: ctx 227 + offset based on num_gt1 and num_eq1
    // Table 9-19: ctxIdxInc = min(num_gt1, 4) for prefix bins
    //             Suffix uses bypass (Exp-Golomb k=0)
    let sig_positions: Vec<usize> = (0..max_num_coeff).filter(|&i| significant[i]).collect();

    let mut num_gt1 = 0u32;
    let mut num_t1 = 0u32; // trailing ones

    for &pos in sig_positions.iter().rev() {
        // Base context for coeff_abs_level_minus1:
        // ctx = 227 + 5 * min(block_cat, 4) + min(num_gt1, 4) for prefix bin 0
        // Simplified: use ctx 227 + 10*min(num_t1,4) for bin0, 227+5+min(num_gt1,4) for bin1+
        let base_ctx = ctx::COEFF_ABS_LEVEL_START;

        // Bin 0: ctx = base + min(num_t1, 4) (decides abs_level == 1 vs > 1)
        let ci0 = (base_ctx + num_t1.min(4) as usize).min(contexts.len() - 1);
        let prefix_bin0 = decoder.decode_decision(&mut contexts[ci0]);

        let abs_level = if !prefix_bin0 {
            1u32 // abs_level_minus1 = 0 → abs_level = 1
        } else {
            // Bins 1+: ctx = base + 5 + min(num_gt1, 4)
            let mut abs_minus1 = 1u32;
            let ci_rest = (base_ctx + 5 + num_gt1.min(4) as usize).min(contexts.len() - 1);
            while abs_minus1 < 14 {
                if !decoder.decode_decision(&mut contexts[ci_rest]) {
                    break;
                }
                abs_minus1 += 1;
            }
            if abs_minus1 >= 14 {
                // Suffix: Exp-Golomb bypass
                abs_minus1 += decode_exp_golomb_bypass(decoder, 0);
            }
            abs_minus1 + 1
        };

        // Sign bit (bypass)
        let sign = decoder.decode_bypass();
        coeffs[pos] = if sign {
            -(abs_level as i32)
        } else {
            abs_level as i32
        };

        if abs_level == 1 {
            num_t1 += 1;
        }
        if abs_level > 1 {
            num_gt1 += 1;
            num_t1 = 0; // reset trailing ones count
        }
    }

    coeffs
}

/// Identifies the entropy coding mode from a PPS.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EntropyCodingMode {
    /// Context-Adaptive Variable-Length Coding (Baseline).
    Cavlc,
    /// Context-Adaptive Binary Arithmetic Coding (Main/High).
    Cabac,
}

impl EntropyCodingMode {
    /// Determine entropy coding mode from `entropy_coding_mode_flag`.
    pub fn from_flag(flag: bool) -> Self {
        if flag {
            EntropyCodingMode::Cabac
        } else {
            EntropyCodingMode::Cavlc
        }
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_cabac_context_init_equiprobable() {
        let ctx = CabacContext::equiprobable();
        assert_eq!(ctx.state, 0);
        assert!(!ctx.mps);
    }

    #[test]
    fn test_cabac_context_init_from_value() {
        // init_value 0x7E = 126 -> slope = (126>>4)*5-45 = 7*5-45 = -10
        // offset = ((126&15)<<3)-16 = (14<<3)-16 = 96
        // init_state = ((-10)*(26-16))>>4 + 96 = (-100>>4)+96 = -7+96 = 89
        // pre = clamp(89,1,126) = 89
        // 89 > 63 -> state = 89-64 = 25, mps = true
        let ctx = CabacContext::new(26, 0x7E);
        assert_eq!(ctx.state, 25);
        assert!(ctx.mps);
    }

    #[test]
    fn test_cabac_decode_bypass_deterministic() {
        // All-zero data -> bypass always returns false (value stays below range).
        let data = [0x00, 0x00, 0x00, 0x00];
        let mut dec = CabacDecoder::new(&data);
        for _ in 0..8 {
            assert!(!dec.decode_bypass());
        }
    }

    #[test]
    fn test_cabac_decode_terminate_on_end() {
        // Range starts at 510. After subtracting 2, range = 508.
        // If value >= 508, terminate returns true.
        // With all-ones data, value will be large.
        let data = [0xFF, 0xFF, 0xFF, 0xFF];
        let mut dec = CabacDecoder::new(&data);
        // value after init = first 9 bits of 0xFFFF... = 0x1FF = 511
        // range = 510, range -= 2 = 508, value (511) >= 508 -> true
        assert!(dec.decode_terminate());
    }

    #[test]
    fn test_cabac_decode_decision_updates_state() {
        let data = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
        let mut dec = CabacDecoder::new(&data);
        let mut ctx = CabacContext::equiprobable();
        let initial_state = ctx.state;

        // After a decision the state should change.
        let _bin = dec.decode_decision(&mut ctx);
        // The state may or may not differ from initial (depends on MPS/LPS),
        // but the function should not panic.
        assert!(ctx.state <= 63);
        let _ = initial_state;
    }

    #[test]
    fn test_decode_unary_zero() {
        // With all-zero data, decode_decision on an equiprobable context
        // with value=0 should return the MPS (false) immediately,
        // giving unary value 0.
        let data = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
        let mut dec = CabacDecoder::new(&data);
        let mut ctx = CabacContext::equiprobable();
        let val = decode_unary(&mut dec, &mut ctx, 10);
        // Value should be 0 since MPS = false -> decode_decision returns false
        // on MPS path (value < range for all-zero data).
        assert_eq!(val, 0);
    }

    #[test]
    fn test_fixed_length_decode() {
        // All-ones data: bypass bits should all be 1.
        let data = [0xFF, 0xFF, 0xFF, 0xFF];
        let mut dec = CabacDecoder::new(&data);
        let val = decode_fixed_length(&mut dec, 3);
        // 3 bypass bits from all-1s stream should produce 0b111 = 7.
        assert_eq!(val, 7);
    }

    #[test]
    fn test_entropy_coding_mode_from_flag() {
        assert_eq!(
            EntropyCodingMode::from_flag(false),
            EntropyCodingMode::Cavlc
        );
        assert_eq!(EntropyCodingMode::from_flag(true), EntropyCodingMode::Cabac);
    }

    #[test]
    fn test_init_cabac_contexts_count() {
        let contexts = init_cabac_contexts(26);
        assert_eq!(contexts.len(), NUM_CABAC_CONTEXTS);
    }

    #[test]
    fn test_decode_residual_block_length() {
        // Verify that decode_residual_block_cabac always returns the
        // requested number of coefficients regardless of input data.
        let data = [0x00; 32];
        let mut dec = CabacDecoder::new(&data);
        let mut contexts = init_cabac_contexts(26);
        let coeffs = decode_residual_block_cabac(&mut dec, &mut contexts, 16);
        assert_eq!(coeffs.len(), 16);

        // Also verify with max_num_coeff = 4 (chroma DC).
        let data2 = [0x00; 32];
        let mut dec2 = CabacDecoder::new(&data2);
        let mut contexts2 = init_cabac_contexts(26);
        let coeffs2 = decode_residual_block_cabac(&mut dec2, &mut contexts2, 4);
        assert_eq!(coeffs2.len(), 4);
    }

    #[test]
    fn test_transition_table_bounds() {
        // Verify all transition table entries are in [0, 63].
        for &s in TRANSITION_MPS.iter() {
            assert!(s <= 63);
        }
        for &s in TRANSITION_LPS.iter() {
            assert!(s <= 63);
        }
    }

    #[test]
    fn test_range_table_positive() {
        // All range table entries should be > 0.
        for row in RANGE_TABLE.iter() {
            for &val in row.iter() {
                assert!(val > 0);
            }
        }
    }
}