opus-rs 0.1.16 - Docs.rs

/// SILK bitstream comparison test
/// Compares Rust SILK encoder output against C reference (opus-1.6)
///
/// C reference was generated with:
///   - 8kHz NB, mono, VOIP, complexity 0, 10kbps CBR
///   - Input: 160 samples of 440Hz sine at amplitude 10000
///   - Run verify_silk_detail.c to regenerate reference data
use opus_rs::range_coder::RangeCoder;
use opus_rs::silk::control_codec::*;
use opus_rs::silk::define::*;
use opus_rs::silk::enc_api::{silk_encode_do_vad, silk_encode_frame};
use opus_rs::silk::init_encoder::silk_init_encoder;
use opus_rs::silk::structs::*;

/// Generate the exact input frame that C's encode_frame_FIX receives.
/// This is C's inputBuf[1..161] after the full pipeline:
///   float sine (amplitude 1.0, 32768 scale) → HP filter → silk_resampler (6-sample delay)
///   → inputBuf with sMid[1]=0 at index 0 → LP variable cutoff (mode=0, passthrough)
/// Captured directly from C debug output of opus_encode_float with 440Hz sine at 8kHz.
fn generate_c_reference_input() -> Vec<i16> {
    vec![
        0, 0, 0, 0, 0, 0, 0, 0, 10740, 19497, 25261, 27373, 25605, 20190, 11791, 1424, -9664,
        -20138, -28737, -32768, -32768, -32768, -29233, -20701, -10103, 1330, 12264, 21426, 27748,
        30504, 29383, 24534, 16549, 6388, -4734, -15487, -24585, -30938, -32768, -32767, -28003,
        -20039, -9807, 1494, 12539, 22031, 28857, 32219, 31729, 27451, 19900, 9974, -1145, -12136,
        -21694, -28684, -32273, -32032, -27986, -20608, -10768, 376, 11508, 21315, 28642, 32622,
        32767, 29124, 22063, 12441, 1398, -9759, -19710, -27278, -31568, -32071, -28729, -21936,
        -12495, -1523, 9683, 19797, 27623, 32236, 32767, 30081, 23565, 14313, 3417, -7835, -18114,
        -26206, -31154, -32377, -29728, -23523, -14497, -3718, 7538, 17940, 26255, 31499, 32767,
        30729, 24803, 15972, 5282, -6007, -16560, -25129, -30705, -32628, -30672, -25069, -16483,
        -5931, 5341, 15997, 24776, 30640, 32767, 31273, 25963, 17596, 7159, -4117, -14897, -23908,
        -30084, -32698, -31439, -26456, -18342, -8055, 3188, 14058, 23268, 29730, 32678, 31763,
        27095, 19223, 9078, -2139, -13103, -22519, -29274, -32567, -32011, -27672, -20062, -10082,
        1088, 12128, 21731, 28763, 32392, 32187, 28176, 20829,
    ]
}

/// Create a NB SILK encoder matching the C reference configuration:
/// 8kHz, 20ms, complexity 0, CBR, target_rate matched to C (snr_db_q7=2205)
fn create_nb_encoder() -> SilkEncoderState {
    let mut enc = SilkEncoderState::default();
    silk_init_encoder(&mut enc, 0);
    silk_control_encoder(&mut enc, 8, 20, 10000, 0);
    enc.s_cmn.use_cbr = 1;
    // Match C reference: snr_db_q7=2205, target_rate_bps=9600
    // (C sets these via silk_control_SNR and silk_Encode's bitrate computation)
    enc.s_cmn.snr_db_q7 = 2205;
    enc.s_cmn.target_rate_bps = 9600;
    enc
}

#[test]
fn test_silk_encode_nb_frame1_structure() {
    // Test that SILK NB encoder produces valid output with correct structure
    let mut enc = create_nb_encoder();
    let input = generate_c_reference_input();

    // Verify encoder configuration matches C reference
    assert_eq!(enc.s_cmn.fs_khz, 8, "Should be 8kHz NB");
    assert_eq!(enc.s_cmn.frame_length, 160, "20ms at 8kHz = 160 samples");
    assert_eq!(enc.s_cmn.nb_subfr, 4, "4 subframes for 20ms");
    assert_eq!(enc.s_cmn.subfr_length, 40, "5ms subframes at 8kHz");
    assert_eq!(enc.s_cmn.predict_lpc_order, 10, "LPC order 10 for NB");
    assert_eq!(enc.s_cmn.ltp_mem_length, 160, "LTP memory = 20ms");
    assert_eq!(enc.s_cmn.la_pitch, 16, "LA pitch = 2ms at 8kHz");
    assert_eq!(enc.s_cmn.la_shape, 24, "LA shape = 3ms*8 for complexity 0");
    assert_eq!(
        enc.s_cmn.n_states_delayed_decision, 1,
        "1 state for complexity 0 (plain NSQ)"
    );
    assert!(
        enc.ps_nlsf_cb.is_some(),
        "NLSF codebook should be set for NB"
    );

    let mut rc = RangeCoder::new_encoder(1275);
    let mut n_bytes_out: i32 = 0;

    /* Write LBRR/VAD preamble exactly as C's silk_Encode does:
     * nFramesPerPacket=1, nChannelsInternal=1
     * iCDF[0] = 256 - (256 >> (nFramesPerPacket+1)*nChannelsInternal) = 256 - 64 = 192
     * Writes value 0 (no LBRR), which represents VAD+FEC flags placeholder
     */
    let icdf_preamble = [192u8, 0u8];
    rc.encode_icdf(0, &icdf_preamble, 8);
    let bits_after_preamble = rc.tell() as i32;
    eprintln!("DBG preamble: bits_after_preamble={}", bits_after_preamble);

    /* Run VAD before encoding (like the C encoder does) */
    silk_encode_do_vad(&mut enc, &input, 1);

    let ret = silk_encode_frame(
        &mut enc,
        &input,
        &mut rc,
        &mut n_bytes_out,
        CODE_INDEPENDENTLY,
        192, // max_bits = 192 = (25-1)*8 for 9600 bps at 20ms (matching C's maxBits)
        1,   // CBR
    );

    assert_eq!(ret, 0, "Encode should succeed");
    assert!(n_bytes_out > 0, "Should produce output bytes");

    // Dump the encoded data for manual comparison
    rc.done();
    let mut payload = vec![0u8; n_bytes_out as usize];
    // Copy front part
    let front = rc.offs as usize;
    payload[..front].copy_from_slice(&rc.buf[..front]);
    // Copy end part
    let end_len = rc.end_offs as usize;
    if end_len > 0 {
        let src_start = (rc.storage - rc.end_offs) as usize;
        payload[n_bytes_out as usize - end_len..]
            .copy_from_slice(&rc.buf[src_start..src_start + end_len]);
    }

    println!("Frame 1: {} bytes", n_bytes_out);
    print!("RUST_FRAME1:");
    for b in &payload {
        print!("{:02x}", b);
    }
    println!();

    // Basic sanity checks on the bitstream
    assert!(
        n_bytes_out >= 5 && n_bytes_out <= 100,
        "Output size {} should be reasonable for NB SILK",
        n_bytes_out
    );

    // Check signal type was determined
    let sig_type = enc.s_cmn.indices.signal_type;
    println!(
        "Signal type: {} (0=inactive, 1=unvoiced, 2=voiced)",
        sig_type
    );
    // For a 440Hz sine, the encoder should detect it as voiced
    assert!(
        sig_type == TYPE_VOICED as i8 || sig_type == TYPE_UNVOICED as i8,
        "Signal type {} should be voiced or unvoiced for sine input",
        sig_type
    );
}

#[test]
fn test_silk_encode_nb_two_frames_consistency() {
    let mut enc = create_nb_encoder();
    let input = generate_c_reference_input();

    // Encode frame 1
    let mut rc1 = RangeCoder::new_encoder(1275);
    let mut n1: i32 = 0;
    silk_encode_do_vad(&mut enc, &input, 1);
    silk_encode_frame(
        &mut enc,
        &input,
        &mut rc1,
        &mut n1,
        CODE_INDEPENDENTLY,
        8000,
        1,
    );
    enc.s_cmn.n_frames_encoded += 1;

    let sig1 = enc.s_cmn.indices.signal_type;
    let prev_lag = enc.s_cmn.prev_lag;

    println!(
        "Frame 1: {} bytes, signal_type={}, prev_lag={}",
        n1, sig1, prev_lag
    );

    // Encode frame 2 with same input
    let mut rc2 = RangeCoder::new_encoder(1275);
    let mut n2: i32 = 0;
    silk_encode_do_vad(&mut enc, &input, 1);
    silk_encode_frame(
        &mut enc,
        &input,
        &mut rc2,
        &mut n2,
        CODE_INDEPENDENTLY, // In C, first frame of packet is always INDEPENDENTLY
        8000,
        1,
    );
    enc.s_cmn.n_frames_encoded += 1;

    let sig2 = enc.s_cmn.indices.signal_type;

    println!("Frame 2: {} bytes, signal_type={}", n2, sig2);

    // Both frames should produce valid output
    assert!(n1 > 0 && n2 > 0, "Both frames should produce output");

    // Signal type should be stable for same input
    // (after first frame warmup, subsequent frames should be consistent)
    println!(
        "Frame 1 signal_type: {}, Frame 2 signal_type: {}",
        sig1, sig2
    );
}

#[test]
fn test_silk_encode_nb_silent_frame() {
    let mut enc = create_nb_encoder();

    // Warm up with one sine frame
    let sine = generate_c_reference_input();
    let mut rc = RangeCoder::new_encoder(1275);
    let mut n: i32 = 0;
    silk_encode_do_vad(&mut enc, &sine, 1);
    silk_encode_frame(
        &mut enc,
        &sine,
        &mut rc,
        &mut n,
        CODE_INDEPENDENTLY,
        8000,
        1,
    );
    enc.s_cmn.n_frames_encoded += 1;

    // Now encode silent frame
    let silence = vec![0i16; 160];
    let mut rc2 = RangeCoder::new_encoder(1275);
    let mut n2: i32 = 0;
    silk_encode_do_vad(&mut enc, &silence, 1);
    silk_encode_frame(
        &mut enc,
        &silence,
        &mut rc2,
        &mut n2,
        CODE_INDEPENDENTLY,
        8000,
        1,
    );

    println!("Silent frame: {} bytes", n2);
    assert!(n2 > 0, "Silent frame should still produce output");

    // Silent input should produce NO_VOICE_ACTIVITY or UNVOICED
    let sig = enc.s_cmn.indices.signal_type;
    println!("Silent signal_type: {}", sig);
}

#[test]
fn test_opus_encoder_silk_nb() {
    // Test the full OpusEncoder in SILK mode
    let mut enc =
        opus_rs::OpusEncoder::new(8000, 1, opus_rs::Application::Voip).expect("Create encoder");
    enc.bitrate_bps = 10000;
    enc.complexity = 0;
    enc.use_cbr = true;

    // Generate float input
    let mut pcm = vec![0.0f32; 160];
    for i in 0..160 {
        pcm[i] =
            (10000.0 / 32768.0) * (2.0 * std::f32::consts::PI * 440.0 * i as f32 / 8000.0).sin();
    }

    let mut output = vec![0u8; 256];
    let result = enc.encode(&pcm, 160, &mut output);

    match result {
        Ok(len) => {
            println!("OpusEncoder SILK NB: {} bytes", len);
            print!("OPUS_OUTPUT:");
            for i in 0..len {
                print!("{:02x}", output[i]);
            }
            println!();

            // Check TOC byte
            let toc = output[0];
            let config = (toc >> 3) & 0x1f;
            let s = (toc >> 2) & 1;
            let c = toc & 3;
            println!("TOC: {:02x} (config={}, s={}, c={})", toc, config, s, c);

            // For SILK NB 20ms mono, config should be 1
            // But our gen_toc uses (bandwidth - NB) << 5 | (period-2) << 3
            // With NB: bw=0, 8kHz/160samples = 50fps → period computation
            // frame_rate=50, rate=50→100→200→400, period=3
            // toc = 0 | (3-2)<<3 | c = 8 | c
            assert!(config <= 3, "Config {} should be valid SILK NB", config);
            assert_eq!(s, 0, "Should be mono");
        }
        Err(e) => {
            panic!("OpusEncoder encode failed: {}", e);
        }
    }
}

/// Test WB (16kHz) SILK encoding matches expected structure
#[test]
fn test_silk_encode_wb_frame() {
    let mut enc = SilkEncoderState::default();
    silk_init_encoder(&mut enc, 0);
    silk_control_encoder(&mut enc, 16, 20, 20000, 0);
    enc.s_cmn.use_cbr = 1;
    enc.s_cmn.snr_db_q7 = 25 * 128;

    // Verify WB configuration
    assert_eq!(enc.s_cmn.fs_khz, 16);
    assert_eq!(enc.s_cmn.frame_length, 320);
    assert_eq!(enc.s_cmn.predict_lpc_order, 16); // WB uses max LPC order

    // Generate 320 samples of 200Hz sine at 16kHz
    let mut input = vec![0i16; 320];
    for i in 0..320 {
        input[i] =
            (10000.0 * (2.0 * std::f64::consts::PI * 200.0 * i as f64 / 16000.0).sin()) as i16;
    }

    let mut rc = RangeCoder::new_encoder(1275);
    let mut n: i32 = 0;
    silk_encode_do_vad(&mut enc, &input, 1);
    let ret = silk_encode_frame(
        &mut enc,
        &input,
        &mut rc,
        &mut n,
        CODE_INDEPENDENTLY,
        16000,
        1,
    );

    assert_eq!(ret, 0);
    assert!(n > 0, "WB encode should produce output, got {} bytes", n);
    println!(
        "WB Frame: {} bytes, signal_type={}",
        n, enc.s_cmn.indices.signal_type
    );
}