pixo 0.4.1 - Docs.rs

//! Trellis quantization for JPEG encoding.
//!
//! Trellis quantization uses rate-distortion optimization to find the optimal
//! quantized coefficients by considering the Huffman coding cost of different
//! coefficient choices.
//!
//! This implementation uses the Viterbi algorithm with improved rate estimation
//! based on actual JPEG Huffman coding costs. This can improve compression
//! by 5-15% compared to simple rounding.
//!
//! Key improvements over basic trellis:
//! - Full path backtracking for optimal coefficient selection
//! - Accurate rate estimation using JPEG's run-length encoding model
//! - Adaptive candidate generation based on coefficient magnitude
//! - Zero-run tracking for better EOB prediction

use crate::jpeg::quantize::ZIGZAG;

/// Lambda value for rate-distortion tradeoff.
/// Higher values favor smaller file sizes over quality.
/// This value is tuned to balance quality and compression.
const DEFAULT_LAMBDA: f32 = 1.0;

/// Maximum number of candidate values to consider per coefficient.
const MAX_CANDIDATES: usize = 5;

/// Maximum number of trellis states to track (pruning threshold).
const MAX_STATES: usize = 8;

/// Trellis state for Viterbi algorithm.
#[derive(Clone, Copy)]
struct TrellisState {
    /// Accumulated cost (rate + lambda * distortion)
    cost: f32,
    /// Number of consecutive zeros before this position (for run-length encoding)
    zero_run: u8,
    /// Parent state index in previous column
    parent: u16,
    /// Quantized coefficient value at this state
    value: i16,
}

impl Default for TrellisState {
    fn default() -> Self {
        Self {
            cost: f32::INFINITY,
            zero_run: 0,
            parent: 0,
            value: 0,
        }
    }
}

/// Perform trellis quantization on a DCT block.
///
/// Uses the Viterbi algorithm to find the optimal quantized coefficients
/// by minimizing rate + lambda * distortion. This version uses full path
/// backtracking for optimal results.
///
/// # Arguments
/// * `dct` - DCT coefficients (floating-point)
/// * `quant_table` - Quantization table values
/// * `lambda` - Rate-distortion tradeoff parameter (higher = smaller files)
///
/// # Returns
/// Quantized coefficients optimized for compression
pub fn trellis_quantize(
    dct: &[f32; 64],
    quant_table: &[f32; 64],
    lambda: Option<f32>,
) -> [i16; 64] {
    let lambda = lambda.unwrap_or(DEFAULT_LAMBDA);
    let mut result = [0i16; 64];

    // DC coefficient: use simple rounding (trellis not beneficial for DC)
    result[0] = (dct[0] / quant_table[0]).round() as i16;

    // Collect AC coefficient info in zigzag order
    let mut ac_info: Vec<(usize, f32, f32)> = Vec::with_capacity(63);
    for zz_pos in 1..64 {
        let natural_pos = ZIGZAG[zz_pos];
        ac_info.push((natural_pos, dct[natural_pos], quant_table[natural_pos]));
    }

    // Initialize trellis with single starting state
    let mut current_states: Vec<TrellisState> = vec![TrellisState {
        cost: 0.0,
        zero_run: 0,
        parent: 0,
        value: 0,
    }];

    // Store all states for backtracking
    let mut all_states: Vec<Vec<TrellisState>> = Vec::with_capacity(64);
    all_states.push(current_states.clone());

    // Process each AC coefficient in zigzag order
    for &(_natural_pos, coef, q) in &ac_info {
        // Generate candidate quantized values
        let float_quant = coef / q;
        let candidates = generate_candidates(float_quant);

        // Build next states
        let mut next_states: Vec<TrellisState> = Vec::with_capacity(MAX_STATES * MAX_CANDIDATES);

        for (parent_idx, parent) in current_states.iter().enumerate() {
            for &candidate in &candidates {
                // Calculate distortion
                let reconstructed = candidate as f32 * q;
                let distortion = (coef - reconstructed).powi(2);

                // Calculate rate based on candidate value and zero run
                let (rate, new_zero_run) = if candidate == 0 {
                    // Zero coefficient: may contribute to zero run or trigger ZRL
                    let new_run = parent.zero_run.saturating_add(1);
                    if new_run >= 16 {
                        // Will need ZRL symbol
                        (estimate_zrl_rate(), 0)
                    } else {
                        // Just extend the run
                        (0.0, new_run)
                    }
                } else {
                    // Non-zero: encode run-length + value
                    let rate = estimate_ac_rate(candidate, parent.zero_run);
                    (rate, 0)
                };

                // Total cost
                let cost = parent.cost + rate + lambda * distortion;

                // Check if this improves on existing states
                let state = TrellisState {
                    cost,
                    zero_run: new_zero_run,
                    parent: parent_idx as u16,
                    value: candidate,
                };

                // Try to merge with existing state with same (value, zero_run)
                let existing = next_states
                    .iter_mut()
                    .find(|s| s.value == candidate && s.zero_run == new_zero_run);

                match existing {
                    Some(s) if cost < s.cost => *s = state,
                    None => next_states.push(state),
                    _ => {}
                }
            }
        }

        // Prune to keep only best states
        next_states.sort_by(|a, b| {
            a.cost
                .partial_cmp(&b.cost)
                .unwrap_or(std::cmp::Ordering::Equal)
        });
        next_states.truncate(MAX_STATES);

        all_states.push(next_states.clone());
        current_states = next_states;

        // Early termination if no valid states remain
        if current_states.is_empty() {
            break;
        }
    }

    // Add EOB cost to final states
    for state in &mut current_states {
        if state.zero_run > 0 {
            // We have trailing zeros - EOB will be encoded
            state.cost += estimate_eob_rate();
        }
    }

    // Find best final state
    if let Some(best) = current_states.iter().min_by(|a, b| {
        a.cost
            .partial_cmp(&b.cost)
            .unwrap_or(std::cmp::Ordering::Equal)
    }) {
        // Backtrack to reconstruct optimal path
        let mut path_values = [0i16; 63];
        let mut state_idx = current_states
            .iter()
            .position(|s| std::ptr::eq(s, best))
            .unwrap();

        // Walk backwards through all_states
        for zz_pos in (1..64).rev() {
            let states = &all_states[zz_pos];
            if state_idx < states.len() {
                path_values[zz_pos - 1] = states[state_idx].value;
                state_idx = states[state_idx].parent as usize;
            }
        }

        // Write results in natural order
        for (zz_pos, &val) in path_values.iter().enumerate() {
            let natural_pos = ZIGZAG[zz_pos + 1];
            result[natural_pos] = val;
        }
    }

    result
}

fn generate_candidates(float_quant: f32) -> Vec<i16> {
    let rounded = float_quant.round() as i16;
    let floor_val = float_quant.floor() as i16;
    let ceil_val = float_quant.ceil() as i16;

    let mut candidates = Vec::with_capacity(MAX_CANDIDATES);

    // Always include 0 as a candidate (important for sparsity)
    candidates.push(0);

    // Add floor, round, ceil
    if floor_val != 0 && !candidates.contains(&floor_val) {
        candidates.push(floor_val);
    }
    if rounded != 0 && !candidates.contains(&rounded) {
        candidates.push(rounded);
    }
    if ceil_val != 0 && !candidates.contains(&ceil_val) {
        candidates.push(ceil_val);
    }

    // For larger magnitudes, add one more candidate further from zero
    if float_quant.abs() > 1.5 {
        let extended = if float_quant >= 0.0 {
            ceil_val + 1
        } else {
            floor_val - 1
        };
        if !candidates.contains(&extended) {
            candidates.push(extended);
        }
    }

    candidates
}

fn estimate_ac_rate(value: i16, zero_run: u8) -> f32 {
    let cat = category(value);

    // Huffman code length estimate for (run, size) symbol
    // Typical AC Huffman codes: low run/size = 2-6 bits, high = 12-16 bits
    let rs = ((zero_run as usize) << 4) | (cat as usize);
    let huffman_bits = estimate_ac_huffman_length(rs);

    // Value bits = category
    let value_bits = cat as f32;

    huffman_bits + value_bits
}

fn estimate_ac_huffman_length(rs: usize) -> f32 {
    // Common symbols have shorter codes
    match rs {
        0x00 => 4.0,  // EOB - very common
        0x01 => 2.0,  // (0,1) - most common non-EOB
        0x02 => 2.5,  // (0,2)
        0x03 => 3.0,  // (0,3)
        0x04 => 4.0,  // (0,4)
        0x11 => 3.0,  // (1,1)
        0x12 => 4.0,  // (1,2)
        0x21 => 4.0,  // (2,1)
        0xF0 => 10.0, // ZRL - rare
        _ => {
            // Estimate based on run and size
            let run = (rs >> 4) as f32;
            let size = (rs & 0x0F) as f32;
            3.0 + run * 0.5 + size * 0.3
        }
    }
}

fn estimate_zrl_rate() -> f32 {
    10.0 // ZRL is typically 10-12 bits
}

fn estimate_eob_rate() -> f32 {
    4.0 // EOB is typically 2-4 bits
}

fn category(value: i16) -> u8 {
    let abs_val = value.unsigned_abs();
    if abs_val == 0 {
        0
    } else {
        16 - abs_val.leading_zeros() as u8
    }
}

/// Trellis quantization with custom lambda for different quality targets.
///
/// # Arguments
/// * `dct` - DCT coefficients
/// * `quant_table` - Quantization table
/// * `quality` - Quality setting 1-100 (used to adjust lambda)
pub fn trellis_quantize_adaptive(
    dct: &[f32; 64],
    quant_table: &[f32; 64],
    quality: u8,
) -> [i16; 64] {
    // Adjust lambda based on quality:
    // - Higher quality (80-100): lower lambda, less aggressive quantization
    // - Lower quality (1-50): higher lambda, more aggressive
    let lambda = if quality >= 80 {
        0.5 + (100 - quality) as f32 * 0.025 // 0.5 to 1.0
    } else if quality >= 50 {
        1.0 + (80 - quality) as f32 * 0.033 // 1.0 to 2.0
    } else {
        2.0 + (50 - quality) as f32 * 0.04 // 2.0 to 4.0
    };

    trellis_quantize(dct, quant_table, Some(lambda))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_trellis_quantize_zeros() {
        let dct = [0.0f32; 64];
        let quant = [16.0f32; 64];

        let result = trellis_quantize(&dct, &quant, None);

        // All zeros in, all zeros out
        for &v in &result {
            assert_eq!(v, 0);
        }
    }

    #[test]
    fn test_trellis_quantize_dc() {
        let mut dct = [0.0f32; 64];
        dct[0] = 800.0; // Large DC component

        let mut quant = [16.0f32; 64];
        quant[0] = 16.0;

        let result = trellis_quantize(&dct, &quant, None);

        // DC should be quantized normally
        assert_eq!(result[0], 50); // 800 / 16 = 50
    }

    #[test]
    fn test_generate_candidates() {
        let candidates = generate_candidates(5.3);
        assert!(candidates.contains(&0)); // Always include 0
        assert!(candidates.contains(&5)); // floor
        assert!(candidates.contains(&6)); // ceil

        let candidates = generate_candidates(-5.3);
        assert!(candidates.contains(&0));
        assert!(candidates.contains(&-5));
        assert!(candidates.contains(&-6));
    }

    #[test]
    fn test_generate_candidates_zero() {
        let candidates = generate_candidates(0.1);
        assert!(candidates.contains(&0));
        assert!(!candidates.is_empty());
    }

    #[test]
    fn test_category() {
        assert_eq!(category(0), 0);
        assert_eq!(category(1), 1);
        assert_eq!(category(-1), 1);
        assert_eq!(category(2), 2);
        assert_eq!(category(3), 2);
        assert_eq!(category(127), 7);
        assert_eq!(category(-128), 8);
    }

    #[test]
    fn test_trellis_sparsity() {
        // Test that trellis tends to produce sparser output for small coefficients
        let mut dct = [0.0f32; 64];
        // Small coefficients that might round to 1 but could be zero
        for i in 1..64 {
            dct[i] = 8.0; // Just above threshold
        }

        let quant = [16.0f32; 64];

        let result = trellis_quantize(&dct, &quant, Some(2.0));

        // With high lambda, many coefficients should be quantized to 0
        let zero_count = result.iter().skip(1).filter(|&&x| x == 0).count();
        assert!(
            zero_count > 30,
            "Expected many zeros with high lambda, got {zero_count}"
        );
    }

    #[test]
    fn test_adaptive_lambda() {
        let mut dct = [0.0f32; 64];
        dct[0] = 800.0;
        dct[1] = 50.0;

        let quant = [16.0f32; 64];

        // High quality should preserve more detail
        let high_q = trellis_quantize_adaptive(&dct, &quant, 95);

        // Low quality can be more aggressive
        let low_q = trellis_quantize_adaptive(&dct, &quant, 30);

        // DC should be the same
        assert_eq!(high_q[0], low_q[0]);

        // Both should produce valid results
        assert!(high_q[1].abs() <= 5);
        assert!(low_q[1].abs() <= 5);
    }

    #[test]
    fn test_category_edge_cases() {
        // Edge cases for category calculation
        assert_eq!(category(i16::MAX), 15);
        assert_eq!(category(i16::MIN + 1), 15);
        assert_eq!(category(16383), 14);
        assert_eq!(category(-16384), 15);
    }

    #[test]
    fn test_generate_candidates_negative() {
        let candidates = generate_candidates(-3.7);
        assert!(candidates.contains(&0));
        assert!(candidates.contains(&-3) || candidates.contains(&-4));
    }

    #[test]
    fn test_generate_candidates_exact_integer() {
        let candidates = generate_candidates(5.0);
        assert!(candidates.contains(&0));
        assert!(candidates.contains(&5));
    }

    #[test]
    fn test_generate_candidates_small() {
        // Small values should include 0 and surrounding integers
        let candidates = generate_candidates(0.5);
        assert!(candidates.contains(&0));
        assert!(candidates.contains(&1) || candidates.is_empty());
    }

    #[test]
    fn test_trellis_quantize_single_ac() {
        let mut dct = [0.0f32; 64];
        dct[0] = 400.0; // DC
        dct[1] = 50.0; // Single AC coefficient

        let quant = [16.0f32; 64];

        let result = trellis_quantize(&dct, &quant, None);

        // DC should be quantized correctly
        assert_eq!(result[0], 25); // 400 / 16 = 25
    }

    #[test]
    fn test_trellis_quantize_preserves_dc() {
        // Test that DC is always preserved (not subject to trellis optimization)
        let mut dct = [0.0f32; 64];
        dct[0] = 160.0; // 160/16 = 10

        let quant = [16.0f32; 64];

        let result = trellis_quantize(&dct, &quant, Some(10.0)); // High lambda

        // DC should still be 10 regardless of lambda
        assert_eq!(result[0], 10);
    }

    #[test]
    fn test_trellis_quantize_high_frequency() {
        // Test behavior with high-frequency coefficients
        let mut dct = [0.0f32; 64];
        dct[0] = 200.0; // DC
                        // Set high-frequency coefficients (late in zigzag order)
        dct[63] = 32.0;
        dct[62] = 48.0;

        let quant = [16.0f32; 64];

        let result = trellis_quantize(&dct, &quant, None);

        // Should produce valid quantized values
        assert_eq!(result[0], 13); // 200 / 16 = 12.5 -> 13
    }

    #[test]
    fn test_trellis_quantize_near_threshold() {
        // Test coefficients near the quantization threshold
        let mut dct = [0.0f32; 64];
        dct[0] = 160.0;
        // Coefficients that are just above/below threshold
        dct[1] = 8.1; // Just above 0.5 * quant = 8
        dct[2] = 7.9; // Just below 0.5 * quant = 8

        let mut quant = [16.0f32; 64];
        quant[1] = 16.0;
        quant[2] = 16.0;

        let result = trellis_quantize(&dct, &quant, None);

        // Both should potentially quantize to 0 or 1 based on rate-distortion tradeoff
        assert!(result[1].abs() <= 1);
        assert!(result[2].abs() <= 1);
    }

    #[test]
    fn test_estimate_ac_huffman_length_common_symbols() {
        // Test that common symbols get reasonable length estimates
        assert!(estimate_ac_huffman_length(0x00) <= 4.0); // EOB
        assert!(estimate_ac_huffman_length(0x01) <= 3.0); // (0,1)
        assert!(estimate_ac_huffman_length(0xF0) >= 8.0); // ZRL is rare
    }

    #[test]
    fn test_trellis_quantize_negative_coefficients() {
        let mut dct = [0.0f32; 64];
        dct[0] = -100.0;
        dct[1] = -50.0;
        dct[2] = 30.0;

        let quant = [10.0f32; 64];

        let result = trellis_quantize(&dct, &quant, None);

        // DC should handle negative values
        assert_eq!(result[0], -10); // -100 / 10 = -10
    }

    #[test]
    fn test_adaptive_quality_boundaries() {
        let mut dct = [0.0f32; 64];
        dct[0] = 500.0;
        let quant = [16.0f32; 64];

        // Test quality boundaries
        let _q1 = trellis_quantize_adaptive(&dct, &quant, 1);
        let _q50 = trellis_quantize_adaptive(&dct, &quant, 50);
        let _q80 = trellis_quantize_adaptive(&dct, &quant, 80);
        let _q100 = trellis_quantize_adaptive(&dct, &quant, 100);

        // All should produce valid DC
        assert_eq!(_q1[0], 31); // 500 / 16 = 31.25 -> 31
        assert_eq!(_q100[0], 31);
    }

    #[test]
    fn test_trellis_state_default() {
        let state = TrellisState::default();
        assert_eq!(state.cost, f32::INFINITY);
        assert_eq!(state.zero_run, 0);
        assert_eq!(state.parent, 0);
        assert_eq!(state.value, 0);
    }

    #[test]
    fn test_trellis_with_custom_lambda() {
        let mut dct = [0.0f32; 64];
        dct[0] = 800.0;
        for i in 1..64 {
            dct[i] = 10.0; // Small uniform coefficients
        }

        let quant = [16.0f32; 64];

        // Very low lambda should preserve more coefficients
        let result_low = trellis_quantize(&dct, &quant, Some(0.1));

        // Very high lambda should zero more coefficients
        let result_high = trellis_quantize(&dct, &quant, Some(10.0));

        // Count non-zero AC coefficients
        let nonzero_low: usize = result_low.iter().skip(1).filter(|&&x| x != 0).count();
        let nonzero_high: usize = result_high.iter().skip(1).filter(|&&x| x != 0).count();

        // Higher lambda should produce sparser result
        assert!(
            nonzero_high <= nonzero_low,
            "High lambda ({nonzero_high}) should be sparser than low ({nonzero_low})"
        );
    }

    #[test]
    fn test_trellis_zigzag_ordering() {
        // Verify that trellis processes coefficients in zigzag order
        let mut dct = [0.0f32; 64];
        dct[0] = 200.0; // DC (position 0)
        dct[1] = 50.0; // Should map to zigzag position 1
        dct[8] = 40.0; // Should map to zigzag position 2

        let quant = [10.0f32; 64];

        let result = trellis_quantize(&dct, &quant, Some(0.5));

        // DC should be at position 0
        assert_eq!(result[0], 20);
        // Just verify the function completed and returned a valid array
        assert_eq!(result.len(), 64);
    }
}