codec2/
lib.rs

1//! Codec 2 is an open source (LGPL 2.1) low bit rate speech codec.
2//!
3//! This is a zero dependencies pure rust port of the original [http://rowetel.com/codec2.html](http://rowetel.com/codec2.html)
4//! Currently 3200 and 2400 bitrates encoding and decoding are implemented.
5//!
6//! # Basic Usage
7//!
8//! Create a Codec2 object e.g. `Codec2::new(Codec2Mode::MODE_3200)` then repeatedly obtain raw 8khz
9//! 16-bit audio samples and call `encode` to encode blocks of `samples_per_frame()` samples into
10//! `bits_per_frame()` compressed bits, in order. On the receiving end, create a Codec2 object and
11//! repeatedly call `decode` to decompress each chunk of received bytes into the next samples.
12//!
13//! Complete example below. This example uses zerocopy to interpret the `&[i16]` slices as `&[u8]`
14//! for I/O.
15//!
16//! Cargo.toml:
17//!
18//! ```toml
19//! [dependencies]
20//! zerocopy = "0.3.0"
21//! codec2 = "0"
22//! ```
23//!
24//! main.rs:
25//!
26//! ```rust
27//! use codec2::*;
28//! use std::env::args;
29//! use std::io::prelude::*;
30//! use zerocopy::AsBytes;
31//!
32//! fn main() -> std::io::Result<()> {
33//!     if args().len() != 4 || (args().nth(1).unwrap() != "enc" && args().nth(1).unwrap() != "dec") {
34//!         eprintln!("Usage: {} (enc|dec) inputfile outputfilename", args().nth(0).unwrap());
35//!         eprintln!("Files should be raw 16-bit signed 8khz audio");
36//!         return Ok(());
37//!     }
38//!     let mut fin = std::fs::File::open(args().nth(2).unwrap())?;
39//!     let mut fout = std::fs::File::create(args().nth(3).unwrap())?;
40//!     let mut c2 = Codec2::new(Codec2Mode::MODE_3200);
41//!     let mut samps = vec![0; c2.samples_per_frame()]; //u16 I/O buffer
42//!     let mut packed = vec![0; (c2.bits_per_frame() + 7) / 8]; //u8 I/O buffer for encoded bits
43//!     if args().nth(1).unwrap() == "enc" {
44//!         while fin.read_exact(samps.as_bytes_mut()).is_ok() {
45//!             c2.encode(&mut packed, &samps[..]);
46//!             fout.write_all(&packed)?;
47//!         }
48//!     } else {
49//!         while fin.read_exact(&mut packed).is_ok() {
50//!             c2.decode(&mut samps[..], &packed);
51//!             fout.write_all(samps.as_bytes())?;
52//!         }
53//!     }
54//!     Ok(())
55//! }
56//! ```
57
58#![allow(non_snake_case)]
59#![allow(non_camel_case_types)]
60#![allow(non_upper_case_globals)]
61#![warn(trivial_numeric_casts)]
62
63#[cfg(test)]
64mod tests {
65    #[test]
66    fn encode_test() {
67        let mut c2 = crate::Codec2::new(crate::Codec2Mode::MODE_3200);
68        //160 samples
69        let spf = c2.samples_per_frame();
70        let mut samps = vec![0; spf * 2];
71        let bpf = c2.bits_per_frame();
72        println!(
73            "c2 is n_samp {:?} m_pitch {:?} Sn.len {:?} bpf {} spf {}",
74            c2.internal.n_samp,
75            c2.internal.m_pitch,
76            c2.internal.Sn.len(),
77            bpf,
78            spf
79        );
80        let mut outbuf = vec![0; (bpf + 7) / 8];
81        c2.encode(&mut outbuf, &samps);
82        assert_eq!(&outbuf[..], &[0xC0, 0, 0x6A, 0x43, 0x9C, 0xE4, 0x21, 8][..]);
83        let outbuforig = outbuf.clone();
84        println!("encoded {:X?}", outbuf);
85        c2.encode(&mut outbuf, &samps);
86        assert_eq!(&outbuf[..], &[0x81, 0, 9, 0x43, 0x9C, 0xE4, 0x21, 8][..]);
87        println!("encoded {:X?}", outbuf);
88        c2.encode(&mut outbuf, &samps);
89        assert_eq!(&outbuf[..], &[1, 0, 9, 0x43, 0x9C, 0xE4, 0x21, 8][..]);
90        println!("encoded {:X?}", outbuf);
91        c2.encode(&mut outbuf, &samps);
92        assert_eq!(&outbuf[..], &[1, 0, 9, 0x43, 0x9C, 0xE4, 0x21, 8][..]);
93        println!("encoded {:X?}", outbuf);
94
95        c2.decode(&mut samps, &outbuforig);
96        println!("decoded {:X?}", samps);
97        for samp in &samps {
98            assert!((*samp).abs() < 0x100);
99        }
100    }
101}
102
103mod kiss_fft;
104mod nlp;
105use nlp::FDMDV_OS_TAPS_16K;
106mod quantise;
107use crate::quantise::*;
108mod codebook;
109use crate::codebook::*;
110mod codebookd;
111use crate::codebookd::*;
112mod codebookge;
113use crate::codebookge::*;
114mod codebookjvm;
115use crate::codebookjvm::*;
116
117const WO_BITS: i32 = 7;
118const WO_E_BITS: u32 = 8;
119const LSPD_SCALAR_INDEXES: usize = 10;
120const LSP_SCALAR_INDEXES: usize = 10;
121use std::f64::consts::PI;
122
123const N_S: f32 = 0.01; //  internal proc frame length in secs
124const TW_S: f32 = 0.005; //  trapezoidal synth window overlap
125const MAX_AMP: usize = 160; //  maximum number of harmonics
126const TWO_PI: f32 = 6.283185307; //  mathematical constant
127const FFT_ENC: usize = 512; //  size of FFT used for encoder
128const FFT_DEC: usize = 512; //  size of FFT used in decoder
129const V_THRESH: f32 = 6.0; //  voicing threshold in dB
130const LPC_ORD: usize = 10; //  LPC order
131                           //  Pitch estimation constants
132const M_PITCH_S: f32 = 0.0400; //  pitch analysis window in s
133const P_MIN_S: f32 = 0.0025; //  minimum pitch period in s
134const P_MAX_S: f32 = 0.0200; //  maximum pitch period in s
135mod inner {
136    use crate::*;
137    #[derive(Clone, Debug)]
138    pub struct C2const {
139        pub Fs: i32,       //  sample rate of this instance
140        pub n_samp: usize, //  number of samples per 10ms frame at Fs
141        //        pub max_amp: i32,   //  maximum number of harmonics
142        pub m_pitch: usize, //  pitch estimation window size in samples
143        pub p_min: i32,     //  minimum pitch period in samples
144        pub p_max: i32,     //  maximum pitch period in samples
145        pub Wo_min: f32,
146        pub Wo_max: f32,
147        pub nw: usize, //  analysis window size in samples
148        pub tw: usize, //  trapezoidal synthesis window overlap
149    }
150    impl C2const {
151        pub fn new(Fs: i32, framelength_s: f32) -> Self {
152            Self {
153                Fs: Fs,
154                n_samp: ((Fs as f32) * framelength_s).round() as usize,
155                //                max_amp: ((Fs as f32) * P_MAX_S / 2.0).floor() as i32,
156                p_min: ((Fs as f32) * P_MIN_S).floor() as i32,
157                p_max: ((Fs as f32) * P_MAX_S).floor() as i32,
158                m_pitch: ((Fs as f32) * M_PITCH_S).floor() as usize,
159                Wo_min: TWO_PI / ((Fs as f32) * P_MAX_S).floor(),
160                Wo_max: TWO_PI / ((Fs as f32) * P_MIN_S).floor(),
161                nw: 279,
162                tw: ((Fs as f32) * TW_S) as usize,
163            }
164        }
165
166        /*---------------------------------------------------------------------------*\
167
168          FUNCTION....: dft_speech
169          AUTHOR......: David Rowe, conversion by Matt Weeks
170          DATE CREATED: 27/5/94
171
172          Finds the DFT of the current speech input speech frame.
173
174        \*---------------------------------------------------------------------------*/
175        // TODO: we can either go for a faster FFT using fftr and some stack usage
176        // or we can reduce stack usage to almost zero on STM32 by switching to fft_inplace
177        pub fn dft_speech(
178            &self,
179            fft_fwd_cfg: &codec2_fft_cfg,
180            Sw: &mut [COMP],
181            Sn: &[f32],
182            w: &[f32],
183        ) {
184            let m_pitch = self.m_pitch;
185            let nw = self.nw;
186
187            for i in 0..FFT_ENC {
188                Sw[i].r = 0.0;
189                Sw[i].i = 0.0;
190            }
191
192            /* Centre analysis window on time axis, we need to arrange input
193            to FFT this way to make FFT phases correct */
194
195            //  move 2nd half to start of FFT input vector
196
197            for i in 0..nw / 2 {
198                Sw[i].r = Sn[i + m_pitch / 2] * w[i + m_pitch / 2];
199            }
200
201            //  move 1st half to end of FFT input vector
202
203            for i in 0..nw / 2 {
204                Sw[FFT_ENC - nw / 2 + i].r =
205                    Sn[i + m_pitch / 2 - nw / 2] * w[i + m_pitch / 2 - nw / 2];
206            }
207
208            codec2_fft_inplace(fft_fwd_cfg, Sw);
209        }
210    }
211    //  describes each codebook
212    #[derive(Clone, Debug)]
213    pub struct lsp_codebook {
214        pub k: usize,           //  dimension of vector
215        pub log2m: i32,         //  number of bits in m
216        pub m: usize,           // elements in codebook
217        pub cb: &'static [f32], //  The elements
218    }
219    #[derive(Copy, Clone)]
220    pub struct kiss_fft_cpx {
221        pub r: kiss_fft_scalar,
222        pub i: kiss_fft_scalar,
223    }
224    impl kiss_fft_cpx {
225        pub fn new() -> Self {
226            Self { r: 0.0, i: 0.0 }
227        }
228        pub fn kf_cexp(phase: f64) -> Self {
229            Self {
230                r: phase.cos() as f32,
231                i: phase.sin() as f32,
232            }
233        }
234    }
235    impl std::fmt::Debug for kiss_fft_cpx {
236        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
237            f.write_fmt(format_args!("{},{}", self.r, self.i))
238        }
239    }
240    pub type COMP = kiss_fft_cpx;
241    #[derive(Clone, Debug)]
242    pub struct kiss_fft_state {
243        pub nfft: usize,
244        pub inverse: i32,
245        pub factors: [usize; 2 * MAXFACTORS],
246        pub twiddles: Vec<kiss_fft_cpx>,
247    }
248    impl kiss_fft_state {
249        pub fn new(nfft: usize, inverse_fft: i32) -> Self {
250            let mut twiddles = Vec::with_capacity(nfft);
251            let mut factors = [0; 2 * MAXFACTORS];
252            for i in 0..nfft {
253                let mut phase = -2.0 * PI * (i as f64) / (nfft as f64);
254                if inverse_fft != 0 {
255                    phase *= -1.0;
256                }
257                twiddles.push(kiss_fft_cpx::kf_cexp(phase));
258            }
259            let mut n = nfft;
260            let mut p = 4;
261            let floor_sqrt = (n as f64).sqrt().floor() as usize;
262            let mut idx = 0;
263            // factor out powers of 4, powers of 2, then any remaining primes
264            loop {
265                while (n % p) != 0 {
266                    match p {
267                        4 => p = 2,
268                        2 => p = 3,
269                        _ => p += 2,
270                    }
271                    if p > floor_sqrt {
272                        p = n; //  no more factors, skip to end
273                    }
274                }
275                n /= p;
276                factors[idx] = p;
277                idx += 1;
278                factors[idx] = n;
279                idx += 1;
280                if n <= 1 {
281                    break;
282                } //do..while (n > 1)
283            }
284            Self {
285                nfft: nfft,
286                inverse: inverse_fft,
287                factors: factors,
288                twiddles: twiddles,
289            }
290        }
291    }
292    #[derive(Clone, Debug)]
293    pub struct kiss_fftr_state {
294        pub substate: kiss_fft_cfg,
295        pub tmpbuf: Vec<kiss_fft_cpx>,
296        pub super_twiddles: Vec<kiss_fft_cpx>,
297    }
298    impl kiss_fftr_state {
299        pub fn new(mut nfft: usize, inverse_fft: i32) -> Self {
300            nfft = nfft / 2;
301            let mut res = Self {
302                substate: kiss_fft_state::new(nfft, inverse_fft),
303                tmpbuf: vec![kiss_fft_cpx::new(); nfft],
304                super_twiddles: vec![kiss_fft_cpx::new(); nfft / 2],
305            };
306            for i in 0..nfft / 2 {
307                let mut phase =
308                    -3.14159265358979323846264338327 * ((i as f64 + 1.0) / nfft as f64 + 0.5);
309                if inverse_fft != 0 {
310                    phase *= -1.0;
311                }
312                res.super_twiddles[i] = kiss_fft_cpx::kf_cexp(phase);
313            }
314            res
315        }
316    }
317    #[derive(Clone, Debug)]
318    pub struct NLP {
319        pub Fs: i32, //  sample rate in Hz
320        pub m: usize,
321        pub w: [f32; PMAX_M / DEC], //  DFT window
322        pub sq: [f32; PMAX_M],      //  squared speech samples
323        pub mem_x: f32,
324        pub mem_y: f32,               //  memory for notch filter
325        pub mem_fir: [f32; NLP_NTAP], //  decimation FIR filter memory
326        pub fft_cfg: codec2_fft_cfg,  //  kiss FFT config
327        pub Sn16k: Vec<f32>,          //  Fs=16kHz input speech vector
328                                      //    FILE         *f,
329    }
330    impl NLP {
331        pub fn new(c2const: &C2const) -> Self {
332            let (m, vsize) = if c2const.Fs == 16000 {
333                (
334                    c2const.m_pitch / 2,
335                    FDMDV_OS_TAPS_16K as usize + c2const.n_samp,
336                )
337            } else {
338                (c2const.m_pitch, 0)
339            };
340            let mut w = [0.0; PMAX_M / DEC];
341            for i in 0..m / DEC {
342                w[i] =
343                    0.5 - 0.5 * (2.0 * PI as f32 * i as f32 / (m as f32 / DEC as f32 - 1.0)).cos();
344            }
345            Self {
346                Fs: c2const.Fs, //  sample rate in Hz
347                m: m,
348                w: w,              //  DFT window
349                sq: [0.0; PMAX_M], //  squared speech samples
350                mem_x: 0.0,
351                mem_y: 0.0,                                   //  memory for notch filter
352                mem_fir: [0.0; NLP_NTAP],                     //  decimation FIR filter memory
353                fft_cfg: kiss_fft_state::new(PE_FFT_SIZE, 0), //  kiss FFT config
354                Sn16k: vec![0.0; vsize],                      //  Fs=16kHz input speech vector
355            }
356        }
357    }
358    #[derive(Clone, Debug, Copy)]
359    pub struct MODEL {
360        pub Wo: f32,                 //  fundamental frequency estimate in radians
361        pub L: usize,                //  number of harmonics
362        pub A: [f32; MAX_AMP + 1],   //  amplitiude of each harmonic
363        pub phi: [f32; MAX_AMP + 1], //  phase of each harmonic
364        pub voiced: i32,             //  non-zero if this frame is voiced
365    }
366    impl MODEL {
367        pub fn new(p_max: f32) -> Self {
368            let wo = TWO_PI / p_max;
369            Self {
370                Wo: wo,                               //  fundamental frequency estimate in radians
371                L: (PI / wo as f64).floor() as usize, //  number of harmonics
372                A: [0.0; MAX_AMP + 1],                //  amplitiude of each harmonic
373                phi: [0.0; MAX_AMP + 1],              //  phase of each harmonic
374                voiced: 0,                            //  non-zero if this frame is voiced
375            }
376        }
377    }
378}
379use inner::*;
380type kiss_fft_scalar = f32;
381type kiss_fft_cfg = kiss_fft_state;
382
383/* e.g. an fft of length 128 has 4 factors
384as far as kissfft is concerned
3854*4*4*2
386*/
387const PE_FFT_SIZE: usize = 512;
388const MAXFACTORS: usize = 32;
389
390type codec2_fft_cfg = kiss_fft_state;
391type codec2_fftr_cfg = kiss_fftr_state;
392type kiss_fftr_cfg = kiss_fftr_state;
393fn codec2_fft(cfg: &kiss_fft_cfg, fin: &[kiss_fft_cpx], fout: &mut [kiss_fft_cpx]) {
394    kiss_fft::kiss_fft(cfg, fin, fout);
395}
396fn codec2_fftr(cfg: &mut codec2_fftr_cfg, inp: &[f32], out: &mut [kiss_fft_cpx]) {
397    kiss_fft::kiss_fftr(cfg, inp, out);
398}
399fn codec2_fftri(cfg: &mut codec2_fftr_cfg, inp: &[kiss_fft_cpx], out: &mut [f32]) {
400    kiss_fft::kiss_fftri(cfg, inp, out);
401}
402
403fn codec2_fft_inplace(cfg: &codec2_fft_cfg, inout: &mut [kiss_fft_cpx]) {
404    let mut in_ = [kiss_fft_cpx::new(); 512];
405    // decide whether to use the local stack based buffer for in
406    // or to allow kiss_fft to allocate RAM
407    // second part is just to play safe since first method
408    // is much faster and uses less RAM
409    if cfg.nfft <= 512 {
410        in_[..cfg.nfft].copy_from_slice(&inout[..cfg.nfft]);
411        kiss_fft::kiss_fft(cfg, &in_, inout);
412    } else {
413        kiss_fft::kiss_fft(cfg, &inout.to_vec(), inout);
414    }
415}
416
417const PMAX_M: usize = 320;
418const DEC: usize = 5;
419const NLP_NTAP: usize = 48;
420const BPF_N: usize = 101;
421const LPCPF_BETA: f32 = 0.2;
422const LPCPF_GAMMA: f32 = 0.5;
423
424#[derive(Clone, Debug)]
425struct Codec2Internal {
426    mode: Codec2Mode,
427    c2const: C2const,
428    Fs: i32,
429    n_samp: usize,
430    m_pitch: usize,
431    fft_fwd_cfg: codec2_fft_cfg,   //  forward FFT config
432    fftr_fwd_cfg: codec2_fftr_cfg, //  forward real FFT config
433    w: Vec<f32>,                   //  [m_pitch] time domain hamming window
434    W: [f32; FFT_ENC],             //  DFT of w[]
435    Pn: Vec<f32>,                  //  [2*n_samp] trapezoidal synthesis window
436    bpf_buf: Vec<f32>,             //  buffer for band pass filter
437    Sn: Vec<f32>,                  //  [m_pitch] input speech
438    hpf_states: [f32; 2],          //  high pass filter states
439    nlp: NLP,                      //  pitch predictor states
440    gray: i32,                     //  non-zero for gray encoding
441
442    fftr_inv_cfg: codec2_fftr_cfg, //  inverse FFT config
443    Sn_: Vec<f32>,                 //  [2*n_samp] synthesised output speech
444    ex_phase: f32,                 //  excitation model phase track
445    bg_est: f32,                   //  background noise estimate for post filter
446    prev_f0_enc: f32,              //  previous frame's f0    estimate
447    prev_model_dec: MODEL,         //  previous frame's model parameters
448    prev_lsps_dec: [f32; LPC_ORD], //  previous frame's LSPs
449    prev_e_dec: f32,               //  previous frame's LPC energy
450
451    lpc_pf: i32,     //  LPC post filter on
452    bass_boost: i32, //  LPC post filter bass boost
453    beta: f32,       //  LPC post filter parameters
454    gamma: f32,
455
456    xq_enc: [f32; 2], //  joint pitch and energy VQ states
457    xq_dec: [f32; 2],
458
459    smoothing: i32, //  enable smoothing for channels with errors
460    se: f32,        //  running sum of squared error
461    nse: u32,       //  number of terms in sum
462    post_filter_en: i32,
463}
464
465/*---------------------------------------------------------------------------*\
466
467  FUNCTION....: make_synthesis_window
468  AUTHOR......: David Rowe, conversion by Matt Weeks
469  DATE CREATED: 11/5/94
470
471  Init function that generates the trapezoidal (Parzen) sythesis window.
472
473\*---------------------------------------------------------------------------*/
474fn make_synthesis_window(c2const: &C2const, Pn: &mut [f32]) {
475    let n_samp = c2const.n_samp;
476    let tw = c2const.tw;
477
478    //  Generate Parzen window in time domain
479    for i in 0..n_samp / 2 - tw {
480        Pn[i] = 0.0;
481    }
482    let mut win = 0.0;
483    for i in n_samp / 2 - tw..n_samp / 2 + tw {
484        Pn[i] = win;
485        win += 1.0 / (2.0 * tw as f32);
486    }
487    for i in n_samp / 2 + tw..3 * n_samp / 2 - tw {
488        Pn[i] = 1.0;
489    }
490    win = 1.0;
491    for i in 3 * n_samp / 2 - tw..3 * n_samp / 2 + tw {
492        Pn[i] = win;
493        win -= 1.0 / (2.0 * tw as f32);
494    }
495    for i in 3 * n_samp / 2 + tw..2 * n_samp {
496        Pn[i] = 0.0;
497    }
498}
499
500fn make_analysis_window(
501    c2const: &C2const,
502    fft_fwd_cfg: &codec2_fft_cfg,
503    w: &mut [f32],
504    W: &mut [f32],
505) {
506    let mut wshift = [kiss_fft_cpx::new(); FFT_ENC];
507    let m_pitch = c2const.m_pitch;
508    let nw = c2const.nw;
509
510    /*
511       Generate Hamming window centered on M-sample pitch analysis window
512
513    0            M/2           M-1
514    |-------------|-------------|
515          |-------|-------|
516              nw samples
517
518       All our analysis/synthsis is centred on the M/2 sample.
519    */
520
521    let mut m = 0.0;
522    for i in 0..m_pitch / 2 - nw / 2 {
523        w[i] = 0.0;
524    }
525    let mut j = 0;
526    for i in m_pitch / 2 - nw / 2..m_pitch / 2 + nw / 2 {
527        w[i] = 0.5 - 0.5 * (TWO_PI * (j as f32) / ((nw as f32) - 1.0)).cos();
528        m += w[i] * w[i];
529        j += 1;
530    }
531    for i in m_pitch / 2 + nw / 2..m_pitch {
532        w[i] = 0.0;
533    }
534
535    /* Normalise - makes freq domain amplitude estimation straight
536    forward */
537
538    m = 1.0 / (m * FFT_ENC as f32).sqrt();
539    for i in 0..m_pitch {
540        w[i] *= m;
541    }
542
543    /*
544       Generate DFT of analysis window, used for later processing.  Note
545       we modulo FFT_ENC shift the time domain window w[], this makes the
546       imaginary part of the DFT W[] equal to zero as the shifted w[] is
547       even about the n=0 time axis if nw is odd.  Having the imag part
548       of the DFT W[] makes computation easier.
549
550       0                      FFT_ENC-1
551       |-------------------------|
552
553        ----\               /----
554             \             /
555              \           /          <- shifted version of window w[n]
556               \         /
557                \       /
558                 -------
559
560       |---------|     |---------|
561         nw/2              nw/2
562    */
563
564    let mut temp = [kiss_fft_cpx::new(); FFT_ENC];
565
566    for i in 0..FFT_ENC {
567        wshift[i].r = 0.0;
568        wshift[i].i = 0.0;
569    }
570    for i in 0..(nw / 2) {
571        wshift[i].r = w[i + (m_pitch) / 2];
572    }
573    let mut j = m_pitch / 2 - nw / 2;
574    for i in FFT_ENC - nw / 2..FFT_ENC {
575        wshift[i].r = w[j];
576        j += 1;
577    }
578
579    codec2_fft(fft_fwd_cfg, &wshift[..], &mut temp);
580
581    /*
582        Re-arrange W[] to be symmetrical about FFT_ENC/2.  Makes later
583        analysis convenient.
584
585     Before:
586
587
588       0                 FFT_ENC-1
589       |----------|---------|
590       __                   _
591         \                 /
592          \_______________/
593
594     After:
595
596       0                 FFT_ENC-1
597       |----------|---------|
598                 ___
599                /   \
600       ________/     \_______
601
602    */
603
604    for i in 0..FFT_ENC / 2 {
605        W[i] = temp[i + FFT_ENC / 2].r;
606        W[i + FFT_ENC / 2] = temp[i].r;
607    }
608}
609
610const WordSize: usize = 8;
611const IndexMask: usize = 0x7;
612const ShiftRight: usize = 3;
613fn pack(
614    bitArray: &mut [u8],  //  The output bit string.
615    bitIndex: &mut usize, //  Index into the string in BITS, not bytes.
616    field: i32,           //  The bit field to be packed.
617    fieldWidth: u32,      //  Width of the field in BITS, not bytes.
618) {
619    pack_natural_or_gray(bitArray, bitIndex, field, fieldWidth, 1);
620}
621
622/** Pack a bit field into a bit string, encoding the field in Gray code.
623 *
624 * The output is an array of unsigned char data. The fields are efficiently
625 * packed into the bit string. The Gray coding is a naive attempt to reduce
626 * the effect of single-bit errors, we expect to do a better job as the
627 * codec develops.
628 *
629 * This code would be simpler if it just set one bit at a time in the string,
630 * but would hit the same cache line more often. I'm not sure the complexity
631 * gains us anything here.
632 *
633 * Although field is currently of int type rather than unsigned for
634 * compatibility with the rest of the code, indices are always expected to
635 * be >= 0.
636 */
637fn pack_natural_or_gray(
638    bitArray: &mut [u8],  //  The output bit string.
639    bitIndex: &mut usize, //  Index into the string in BITS, not bytes.
640    mut field: i32,       //  The bit field to be packed.
641    mut fieldWidth: u32,  //  Width of the field in BITS, not bytes.
642    gray: u32,            //  non-zero for gray coding
643) {
644    if gray > 0 {
645        //  Convert the field to Gray code
646        field = (field >> 1) ^ field;
647    }
648
649    while fieldWidth != 0 {
650        let bI = *bitIndex;
651        let bitsLeft = (WordSize - (bI & IndexMask)) as u32;
652        let sliceWidth = if bitsLeft < fieldWidth {
653            bitsLeft
654        } else {
655            fieldWidth
656        };
657        let wordIndex = bI >> ShiftRight;
658
659        bitArray[wordIndex] |=
660            (((field >> (fieldWidth - sliceWidth)) << (bitsLeft - sliceWidth)) & 0xFF) as u8;
661
662        *bitIndex = bI + sliceWidth as usize;
663        fieldWidth -= sliceWidth;
664    }
665}
666
667/** Unpack a field from a bit string, converting from Gray code to binary.
668 *
669 */
670fn unpack(
671    bitArray: &[u8],    //  The input bit string.
672    bitIndex: &mut u32, //  Index into the string in BITS, not bytes.
673    fieldWidth: u32,    //  Width of the field in BITS, not bytes.
674) -> i32 {
675    unpack_natural_or_gray(bitArray, bitIndex, fieldWidth, 1)
676}
677
678/** Unpack a field from a bit string, to binary, optionally using
679 * natural or Gray code.
680 *
681 */
682fn unpack_natural_or_gray(
683    bitArray: &[u8],     //  The input bit string.
684    bitIndex: &mut u32,  //  Index into the string in BITS, not bytes.
685    mut fieldWidth: u32, //  Width of the field in BITS, not bytes.
686    gray: u32,           //  non-zero for Gray coding
687) -> i32 {
688    let mut field = 0;
689    while fieldWidth != 0 {
690        let bI = *bitIndex;
691        let bitsLeft = WordSize as u32 - (bI & IndexMask as u32);
692        let sliceWidth = if bitsLeft < fieldWidth {
693            bitsLeft
694        } else {
695            fieldWidth
696        };
697
698        field |= ((bitArray[(bI >> ShiftRight) as usize] as usize >> (bitsLeft - sliceWidth))
699            & ((1 << sliceWidth) - 1))
700            << (fieldWidth - sliceWidth);
701
702        *bitIndex = bI + sliceWidth;
703        fieldWidth -= sliceWidth;
704    }
705
706    if gray != 0 {
707        //  Convert from Gray code to binary. Works for maximum 8-bit fields.
708        let mut t = field ^ (field >> 8);
709        t ^= t >> 4;
710        t ^= t >> 2;
711        t ^= t >> 1;
712        t as i32
713    } else {
714        field as i32
715    }
716}
717
718/// Codec mode (bitrate). Currently not all bitrates are implemented
719#[derive(Clone, Debug, Copy)]
720#[non_exhaustive]
721pub enum Codec2Mode {
722    MODE_3200,
723    MODE_2400,
724    MODE_1600,
725    MODE_1400,
726    MODE_1300,
727    MODE_1200,
728    //MODE_700C,
729    //MODE_450,
730    //MODE_450PWB,
731}
732use Codec2Mode::*;
733
734/// A Codec2 object for encoding or decoding audio
735#[derive(Clone, Debug)]
736pub struct Codec2 {
737    internal: Codec2Internal,
738}
739
740impl Codec2 {
741    /// Creates a new Codec2 object suitable for encoding or decoding audio
742    pub fn new(mode: Codec2Mode) -> Self {
743        let c2const = C2const::new(8000, N_S);
744        let n_samp = c2const.n_samp;
745        let m_pitch = c2const.m_pitch;
746        let mut c2 = Self {
747            internal: Codec2Internal {
748                mode: mode,
749                Fs: c2const.Fs,
750                n_samp: n_samp,
751                m_pitch: m_pitch,
752                fft_fwd_cfg: kiss_fft_state::new(FFT_ENC, 0), //  forward FFT config
753                fftr_fwd_cfg: kiss_fftr_state::new(FFT_ENC, 0), //  forward real FFT config
754                w: vec![0.0; m_pitch], //  [m_pitch] time domain hamming window
755                W: [0.0; FFT_ENC],     //  DFT of w[]
756                Pn: vec![0.0; 2 * n_samp], //  [2*n_samp] trapezoidal synthesis window
757                bpf_buf: vec![0.0; BPF_N + 4 * n_samp], //  buffer for band pass filter
758                Sn: vec![1.0; m_pitch], //  [m_pitch] input speech
759                hpf_states: [0.0; 2],  //  high pass filter states
760
761                nlp: NLP::new(&c2const), //  pitch predictor states
762                gray: 1,                 //  non-zero for gray encoding
763
764                fftr_inv_cfg: kiss_fftr_state::new(FFT_DEC, 1), //  inverse FFT config
765                Sn_: vec![0.0; m_pitch], //  [2*n_samp] synthesised output speech
766                prev_f0_enc: 1.0 / P_MAX_S,
767                bg_est: 0.0,
768                ex_phase: 0.0,
769                prev_model_dec: MODEL::new(c2const.p_max as f32),
770                c2const: c2const,
771                prev_lsps_dec: [0.0; LPC_ORD],
772                prev_e_dec: 1.0,
773                lpc_pf: 1,        //  LPC post filter on
774                bass_boost: 1,    //  LPC post filter bass boost
775                beta: LPCPF_BETA, //  LPC post filter parameters
776                gamma: LPCPF_GAMMA,
777                xq_enc: [0.0; 2], //  joint pitch and energy VQ states
778                xq_dec: [0.0; 2],
779
780                smoothing: 0, //  enable smoothing for channels with errors
781                se: 0.0,
782                nse: 0,
783                post_filter_en: 1,
784            },
785        };
786        for i in 0..LPC_ORD {
787            c2.internal.prev_lsps_dec[i] = (i as f64 * PI / (LPC_ORD as f64 + 1.0)) as f32;
788        }
789        make_analysis_window(
790            &c2.internal.c2const,
791            &c2.internal.fft_fwd_cfg,
792            &mut c2.internal.w,
793            &mut c2.internal.W,
794        );
795        make_synthesis_window(&c2.internal.c2const, &mut c2.internal.Pn);
796        c2
797    }
798
799    /// The number of bits in an encoded (compressed) frame; 64 for the 3200 bitrate, 48 for 2400.
800    pub fn bits_per_frame(&self) -> usize {
801        match self.internal.mode {
802            MODE_3200 => 64,
803            MODE_2400 => 48,
804            MODE_1600 => 64,
805            MODE_1400 => 56,
806            MODE_1300 => 52,
807            MODE_1200 => 48,
808            // MODE_700C => 28,
809            // MODE_450 => 18,
810            // MODE_450PWB => 18,
811        }
812    }
813
814    /// How many samples an encoded (compressed) frame represents; generally 160 (20ms of speech).
815    pub fn samples_per_frame(&self) -> usize {
816        match self.internal.mode {
817            MODE_3200 | MODE_2400 => 160,
818            MODE_1600 | MODE_1400 | MODE_1300 | MODE_1200 /* | MODE_700C | MODE_450*/ => 320,
819            //MODE_450PWB => 640,
820        }
821    }
822
823    /// Encodes speech samples at current bitrate into `bits_per_frame()`/8 rounded up output bytes.
824    /// For MODE_3200, this is 64 bits or 8 bytes, for MODE_2400, it's 48 bits (6 bytes).
825    pub fn encode(&mut self, bits: &mut [u8], speech: &[i16]) {
826        match self.internal.mode {
827            MODE_3200 => self.codec2_encode_3200(bits, speech),
828            MODE_2400 => self.codec2_encode_2400(bits, speech),
829            MODE_1600 => self.codec2_encode_1600(bits, speech),
830            MODE_1400 => self.codec2_encode_1400(bits, speech),
831            MODE_1300 => self.codec2_encode_1300(bits, speech),
832            MODE_1200 => self.codec2_encode_1200(bits, speech),
833        }
834    }
835
836    /// Decodes `bits_per_frame()` compressed bits into `samples_per_frame()` speech samples.
837    /// For MODE_3200, the input is 64 bits or 8 bytes, for MODE_2400, it's 48 bits (6 bytes).
838    pub fn decode(&mut self, speech: &mut [i16], bits: &[u8]) {
839        match self.internal.mode {
840            MODE_3200 => self.codec2_decode_3200(speech, bits),
841            MODE_2400 => self.codec2_decode_2400(speech, bits),
842            MODE_1600 => self.codec2_decode_1600(speech, bits),
843            MODE_1400 => self.codec2_decode_1400(speech, bits),
844            MODE_1300 => self.codec2_decode_1300(speech, bits, 0.0),
845            MODE_1200 => self.codec2_decode_1200(speech, bits),
846        }
847    }
848
849    /*---------------------------------------------------------------------------*\
850
851      FUNCTION....: codec2_encode_3200
852      AUTHOR......: David Rowe, conversion by Matt Weeks
853      DATE CREATED: 13 Sep 2012
854
855      Encodes 160 speech samples (20ms of speech) into 64 bits.
856
857      The codec2 algorithm actually operates internally on 10ms (80
858      sample) frames, so we run the encoding algorithm twice.  On the
859      first frame we just send the voicing bits.  On the second frame we
860      send all model parameters.  Compared to 2400 we use a larger number
861      of bits for the LSPs and non-VQ pitch and energy.
862
863      The bit allocation is:
864
865        Parameter                      bits/frame
866        --------------------------------------
867        Harmonic magnitudes (LSPs)     50
868        Pitch (Wo)                      7
869        Energy                          5
870        Voicing (10ms update)           2
871        TOTAL                          64
872
873    \*---------------------------------------------------------------------------*/
874    /// Encodes 160 speech samples (20ms of speech) into 64 bits.
875    fn codec2_encode_3200(&mut self, bits: &mut [u8], speech: &[i16]) {
876        let mut model = MODEL::new(self.internal.c2const.p_max as f32);
877        let mut ak = [0.0; LPC_ORD + 1]; //f32
878        let mut lsps = [0.0; LPC_ORD]; //f32
879        let mut lspd_indexes = [0; LPC_ORD];
880        let mut nbit = 0;
881
882        let nbyte = (self.bits_per_frame() + 7) / 8;
883        for i in 0..nbyte {
884            bits[i] = 0;
885        }
886
887        //  first 10ms analysis frame - we just want voicing
888        self.analyse_one_frame(&mut model, speech);
889        pack(bits, &mut nbit, model.voiced, 1);
890
891        //  second 10ms analysis frame
892        self.analyse_one_frame(&mut model, &speech[self.internal.n_samp..]);
893        pack(bits, &mut nbit, model.voiced, 1);
894        let Wo_index = encode_Wo(&self.internal.c2const, model.Wo, WO_BITS);
895        pack(bits, &mut nbit, Wo_index, WO_BITS as u32); //1+1+7 = 9 bits
896
897        let e = speech_to_uq_lsps(
898            &mut lsps,
899            &mut ak,
900            &self.internal.Sn,
901            &self.internal.w,
902            self.internal.m_pitch,
903            LPC_ORD,
904        );
905        let e_index = encode_energy(e, E_BITS);
906        pack(bits, &mut nbit, e_index, E_BITS as u32); //9+5 = 14 bits
907
908        encode_lspds_scalar(&mut lspd_indexes, &lsps, LPC_ORD);
909        for i in 0..LSPD_SCALAR_INDEXES {
910            pack(bits, &mut nbit, lspd_indexes[i], lspd_bits(i) as u32);
911        }
912    }
913
914    /*---------------------------------------------------------------------------*\
915
916      FUNCTION....: codec2_decode_3200
917      AUTHOR......: David Rowe, conversion by Matt Weeks
918      DATE CREATED: 13 Sep 2012
919
920      Decodes a frame of 64 bits into 160 samples (20ms) of speech.
921
922    \*---------------------------------------------------------------------------*/
923    /// Decodes a frame of 64 bits into 160 samples (20ms) of speech.
924    fn codec2_decode_3200(&mut self, speech: &mut [i16], bits: &[u8]) {
925        let mut ak = [[0.0; LPC_ORD + 1]; 2];
926        let mut nbit = 0;
927        let mut Aw = [COMP::new(); FFT_ENC];
928
929        let mut model = [MODEL::new(self.internal.c2const.p_max as f32); 2];
930
931        //  unpack bits from channel ------------------------------------
932
933        /* this will partially fill the model params for the 2 x 10ms
934        frames */
935
936        model[0].voiced = unpack(bits, &mut nbit, 1);
937        model[1].voiced = unpack(bits, &mut nbit, 1);
938
939        let Wo_index = unpack(bits, &mut nbit, WO_BITS as u32);
940        model[1].Wo = decode_Wo(&self.internal.c2const, Wo_index, WO_BITS);
941        model[1].L = (PI / model[1].Wo as f64) as usize;
942
943        let mut e = [0.0; 2];
944        let e_index = unpack(bits, &mut nbit, E_BITS as u32);
945        e[1] = decode_energy(e_index, E_BITS);
946
947        let mut lspd_indexes = [0; LPC_ORD];
948        for i in 0..LSPD_SCALAR_INDEXES {
949            lspd_indexes[i] = unpack(bits, &mut nbit, lspd_bits(i) as u32) as usize;
950        }
951        let mut lsps = [[0.0; LPC_ORD]; 2];
952        decode_lspds_scalar(&mut lsps[1][0..], &lspd_indexes, LPC_ORD);
953
954        //  interpolate ------------------------------------------------
955
956        /* Wo and energy are sampled every 20ms, so we interpolate just 1
957        10ms frame between 20ms samples */
958
959        model[0] = interp_Wo(
960            &model[0],
961            &self.internal.prev_model_dec,
962            &model[1],
963            self.internal.c2const.Wo_min,
964        );
965        e[0] = interp_energy(self.internal.prev_e_dec, e[1]);
966
967        /* LSPs are sampled every 20ms so we interpolate the frame in
968        between, then recover spectral amplitudes */
969
970        let (lsps0, lsps1) = lsps.split_at_mut(1);
971        interpolate_lsp_ver2(
972            &mut lsps0[0][0..],
973            &self.internal.prev_lsps_dec,
974            &mut lsps1[0][0..],
975            0.5,
976            LPC_ORD,
977        );
978
979        for i in 0..2 {
980            lsp_to_lpc(&lsps[i][0..], &mut ak[i][0..], LPC_ORD);
981            let mut snr = 0.0;
982            aks_to_M2(
983                &mut self.internal.fftr_fwd_cfg,
984                &ak[i][..],
985                LPC_ORD,
986                &mut model[i],
987                e[i],
988                &mut snr,
989                0,
990                0,
991                self.internal.lpc_pf,
992                self.internal.bass_boost,
993                self.internal.beta,
994                self.internal.gamma,
995                &mut Aw,
996            );
997            apply_lpc_correction(&mut model[i]);
998            self.synthesise_one_frame(
999                &mut speech[self.internal.n_samp * i..],
1000                &mut model[i],
1001                &mut Aw,
1002                1.0,
1003            );
1004        }
1005
1006        //  update memories for next frame ----------------------------
1007
1008        self.internal.prev_model_dec = model[1];
1009        self.internal.prev_e_dec = e[1];
1010        for i in 0..LPC_ORD {
1011            self.internal.prev_lsps_dec[i] = lsps[1][i];
1012        }
1013    }
1014
1015    /*---------------------------------------------------------------------------*\
1016
1017      FUNCTION....: codec2_encode_2400
1018      AUTHOR......: David Rowe
1019      DATE CREATED: 21/8/2010
1020
1021      Encodes 160 speech samples (20ms of speech) into 48 bits.
1022
1023      The codec2 algorithm actually operates internally on 10ms (80
1024      sample) frames, so we run the encoding algorithm twice.  On the
1025      first frame we just send the voicing bit.  On the second frame we
1026      send all model parameters.
1027
1028      The bit allocation is:
1029
1030        Parameter                      bits/frame
1031        --------------------------------------
1032        Harmonic magnitudes (LSPs)     36
1033        Joint VQ of Energy and Wo       8
1034        Voicing (10ms update)           2
1035        Spare                           2
1036        TOTAL                          48
1037
1038    \*---------------------------------------------------------------------------*/
1039    /// Encodes 160 speech samples (20ms of speech) into 48 bits.
1040    fn codec2_encode_2400(&mut self, bits: &mut [u8], speech: &[i16]) {
1041        let mut model = MODEL::new(self.internal.c2const.p_max as f32);
1042        let mut ak = [0.0; LPC_ORD + 1]; //f32
1043        let mut lsps = [0.0; LPC_ORD]; //f32
1044        let mut lsp_indexes = [0; LPC_ORD];
1045        let mut nbit = 0;
1046
1047        for i in 0..(self.bits_per_frame() + 7) / 8 {
1048            bits[i] = 0;
1049        }
1050
1051        //  first 10ms analysis frame - we just want voicing
1052        self.analyse_one_frame(&mut model, speech);
1053        pack(bits, &mut nbit, model.voiced, 1);
1054
1055        //  second 10ms analysis frame
1056        self.analyse_one_frame(&mut model, &speech[self.internal.n_samp..]);
1057        pack(bits, &mut nbit, model.voiced, 1);
1058        let e = speech_to_uq_lsps(
1059            &mut lsps,
1060            &mut ak,
1061            &self.internal.Sn,
1062            &self.internal.w,
1063            self.internal.m_pitch,
1064            LPC_ORD,
1065        );
1066
1067        let WoE_index = encode_WoE(&model, e, &mut self.internal.xq_enc);
1068        pack(bits, &mut nbit, WoE_index, WO_E_BITS);
1069
1070        encode_lsps_scalar(&mut lsp_indexes, &lsps, LPC_ORD);
1071        for i in 0..LSP_SCALAR_INDEXES {
1072            pack(bits, &mut nbit, lsp_indexes[i], lsp_bits(i));
1073        }
1074        let spare = 0;
1075        pack(bits, &mut nbit, spare, 2);
1076
1077        //assert(nbit == (unsigned)codec2_bits_per_frame(c2));
1078    }
1079
1080    /*---------------------------------------------------------------------------*\
1081
1082      FUNCTION....: codec2_decode_2400
1083      AUTHOR......: David Rowe
1084      DATE CREATED: 21/8/2010
1085
1086      Decodes frames of 48 bits into 160 samples (20ms) of speech.
1087
1088    \*---------------------------------------------------------------------------*/
1089    fn codec2_decode_2400(&mut self, speech: &mut [i16], bits: &[u8]) {
1090        let mut model = [MODEL::new(self.internal.c2const.p_max as f32); 2];
1091        let mut lsp_indexes = [0; LPC_ORD];
1092        let mut lsps = [[0.0; LPC_ORD]; 2];
1093        let mut e = [0.0; 2];
1094        let mut snr = 0.0;
1095        let mut ak = [[0.0; LPC_ORD + 1]; 2];
1096        let mut nbit = 0;
1097        let mut Aw = [COMP::new(); FFT_ENC];
1098
1099        //assert(c2 != NULL);
1100
1101        /* unpack bits from channel ------------------------------------*/
1102
1103        /* this will partially fill the model params for the 2 x 10ms
1104        frames */
1105        model[0].voiced = unpack(bits, &mut nbit, 1);
1106        model[1].voiced = unpack(bits, &mut nbit, 1);
1107
1108        let WoE_index = unpack(bits, &mut nbit, WO_E_BITS) as usize;
1109        decode_WoE(
1110            &self.internal.c2const,
1111            &mut model[1],
1112            &mut e[1],
1113            &mut self.internal.xq_dec,
1114            WoE_index,
1115        );
1116
1117        for i in 0..LSP_SCALAR_INDEXES {
1118            lsp_indexes[i] = unpack(bits, &mut nbit, lsp_bits(i)) as usize;
1119        }
1120        decode_lsps_scalar(&mut lsps[1][0..], &lsp_indexes, LPC_ORD);
1121        check_lsp_order(&mut lsps[1][0..], LPC_ORD);
1122        bw_expand_lsps(&mut lsps[1][0..], LPC_ORD, 50.0, 100.0);
1123
1124        /* interpolate ------------------------------------------------*/
1125
1126        /* Wo and energy are sampled every 20ms, so we interpolate just 1
1127        10ms frame between 20ms samples */
1128
1129        model[0] = interp_Wo(
1130            &model[0],
1131            &self.internal.prev_model_dec,
1132            &model[1],
1133            self.internal.c2const.Wo_min,
1134        );
1135        e[0] = interp_energy(self.internal.prev_e_dec, e[1]);
1136
1137        /* LSPs are sampled every 20ms so we interpolate the frame in
1138        between, then recover spectral amplitudes */
1139
1140        let (lsps0, lsps1) = lsps.split_at_mut(1);
1141        interpolate_lsp_ver2(
1142            &mut lsps0[0][0..],
1143            &self.internal.prev_lsps_dec,
1144            &mut lsps1[0][0..],
1145            0.5,
1146            LPC_ORD,
1147        );
1148
1149        for i in 0..2 {
1150            lsp_to_lpc(&lsps[i][0..], &mut ak[i][0..], LPC_ORD);
1151            aks_to_M2(
1152                &mut self.internal.fftr_fwd_cfg,
1153                &ak[i][..],
1154                LPC_ORD,
1155                &mut model[i],
1156                e[i],
1157                &mut snr,
1158                0,
1159                0,
1160                self.internal.lpc_pf,
1161                self.internal.bass_boost,
1162                self.internal.beta,
1163                self.internal.gamma,
1164                &mut Aw,
1165            );
1166            apply_lpc_correction(&mut model[i]);
1167            self.synthesise_one_frame(
1168                &mut speech[self.internal.n_samp * i..],
1169                &mut model[i],
1170                &mut Aw,
1171                1.0,
1172            );
1173        }
1174
1175        /* update memories for next frame ----------------------------*/
1176        self.internal.prev_model_dec = model[1];
1177        self.internal.prev_e_dec = e[1];
1178        for i in 0..LPC_ORD {
1179            self.internal.prev_lsps_dec[i] = lsps[1][i];
1180        }
1181    }
1182
1183    /*---------------------------------------------------------------------------*\
1184      FUNCTION....: codec2_encode_1600
1185      AUTHOR......: David Rowe, conversion by Raphael Peters
1186      DATE CREATED: Feb 28 2013
1187
1188      Encodes 320 speech samples (40ms of speech) into 64 bits.
1189      The codec2 algorithm actually operates internally on 10ms (80
1190      sample) frames, so we run the encoding algorithm 4 times:
1191      frame 0: voicing bit
1192      frame 1: voicing bit, Wo and E
1193      frame 2: voicing bit
1194      frame 3: voicing bit, Wo and E, scalar LSPs
1195
1196      The bit allocation is:
1197
1198        Parameter                      frame 2  frame 4   Total
1199        -------------------------------------------------------
1200        Harmonic magnitudes (LSPs)      0       36        36
1201        Pitch (Wo)                      7        7        14
1202        Energy                          5        5        10
1203        Voicing (10ms update)           2        2         4
1204        TOTAL                          14       50        64
1205    \*---------------------------------------------------------------------------*/
1206    /// Encodes 320 speech samples (40ms of speech) into 64 bits.
1207    fn codec2_encode_1600(&mut self, bits: &mut [u8], speech: &[i16]) {
1208        let mut model = MODEL::new(self.internal.c2const.p_max as f32);
1209        let mut ak = [0.0; LPC_ORD + 1]; //f32
1210        let mut lsps = [0.0; LPC_ORD]; //f32
1211        let mut lsp_indexes = [0; LPC_ORD];
1212        let mut nbit = 0;
1213
1214        for i in 0..(self.bits_per_frame() + 7) / 8 {
1215            bits[i] = 0;
1216        }
1217
1218        // frame 1: - voicing
1219
1220        self.analyse_one_frame(&mut model, speech);
1221        pack(bits, &mut nbit, model.voiced, 1);
1222
1223        // frame 2: - voicing, scalar Wo & E
1224
1225        self.analyse_one_frame(&mut model, &speech[self.internal.n_samp..]);
1226        pack(bits, &mut nbit, model.voiced, 1);
1227
1228        let mut Wo_index = encode_Wo(&self.internal.c2const, model.Wo, WO_BITS);
1229        pack(bits, &mut nbit, Wo_index, WO_BITS as u32);
1230
1231        // need to run this just to get LPC energy
1232        let mut e = speech_to_uq_lsps(
1233            &mut lsps,
1234            &mut ak,
1235            &self.internal.Sn,
1236            &self.internal.w,
1237            self.internal.m_pitch,
1238            LPC_ORD,
1239        );
1240        let mut e_index = encode_energy(e, E_BITS);
1241        pack(bits, &mut nbit, e_index, E_BITS as u32);
1242
1243        // frame 3: - voicing
1244
1245        self.analyse_one_frame(&mut model, &speech[2 * self.internal.n_samp..]);
1246        pack(bits, &mut nbit, model.voiced, 1);
1247
1248        // frame 4: - voicing, scalar Wo & E, scalar LSPs
1249
1250        self.analyse_one_frame(&mut model, &speech[3 * self.internal.n_samp..]);
1251        pack(bits, &mut nbit, model.voiced, 1);
1252
1253        Wo_index = encode_Wo(&self.internal.c2const, model.Wo, WO_BITS);
1254        pack(bits, &mut nbit, Wo_index, WO_BITS as u32);
1255
1256        e = speech_to_uq_lsps(
1257            &mut lsps,
1258            &mut ak,
1259            &self.internal.Sn,
1260            &self.internal.w,
1261            self.internal.m_pitch,
1262            LPC_ORD,
1263        );
1264        e_index = encode_energy(e, E_BITS);
1265        pack(bits, &mut nbit, e_index, E_BITS as u32);
1266
1267        encode_lsps_scalar(&mut lsp_indexes, &lsps, LPC_ORD);
1268        for i in 0..LSP_SCALAR_INDEXES {
1269            pack(bits, &mut nbit, lsp_indexes[i], lsp_bits(i));
1270        }
1271
1272        assert_eq!(nbit, self.bits_per_frame());
1273    }
1274
1275    /*---------------------------------------------------------------------------*\
1276
1277      FUNCTION....: codec2_decode_1600
1278
1279      AUTHOR......: David Rowe, conversion by Raphael Peters
1280      DATE CREATED: 11 May 2012
1281
1282      Decodes frames of 64 bits into 320 samples (40ms) of speech.
1283
1284
1285    \*---------------------------------------------------------------------------*/
1286    fn codec2_decode_1600(&mut self, speech: &mut [i16], bits: &[u8]) {
1287        let mut model = [MODEL::new(self.internal.c2const.p_max as f32); 4];
1288        let mut lsp_indexes = [0; LPC_ORD];
1289        let mut lsps = [[0.0; LPC_ORD]; 4];
1290        let mut e = [0.0; 4];
1291        let mut snr = 0.0;
1292        let mut ak = [[0.0; LPC_ORD + 1]; 4];
1293        let mut nbit = 0;
1294        let mut Aw = [COMP::new(); FFT_ENC];
1295
1296        /* unpack bits from channel ------------------------------------*/
1297
1298        /* this will partially fill the model params for the 4 x 10ms
1299        frames */
1300
1301        model[0].voiced = unpack(bits, &mut nbit, 1);
1302
1303        model[1].voiced = unpack(bits, &mut nbit, 1);
1304        let Wo_index = unpack(bits, &mut nbit, WO_BITS as u32);
1305        model[1].Wo = decode_Wo(&self.internal.c2const, Wo_index, WO_BITS);
1306        model[1].L = (PI as f32 / model[1].Wo) as usize;
1307
1308        let mut e_index = unpack(bits, &mut nbit, E_BITS as u32);
1309        e[1] = decode_energy(e_index, E_BITS);
1310
1311        model[2].voiced = unpack(bits, &mut nbit, 1);
1312
1313        model[3].voiced = unpack(bits, &mut nbit, 1);
1314        let Wo_index = unpack(bits, &mut nbit, WO_BITS as u32);
1315        model[3].Wo = decode_Wo(&self.internal.c2const, Wo_index, WO_BITS);
1316        model[3].L = (PI as f32 / model[3].Wo) as usize;
1317
1318        e_index = unpack(bits, &mut nbit, E_BITS as u32);
1319        e[3] = decode_energy(e_index, E_BITS);
1320
1321        for i in 0..LSP_SCALAR_INDEXES {
1322            lsp_indexes[i] = unpack(bits, &mut nbit, lsp_bits(i)) as usize;
1323        }
1324        decode_lsps_scalar(&mut lsps[3][0..], &lsp_indexes, LPC_ORD);
1325        check_lsp_order(&mut lsps[3][0..], LPC_ORD);
1326        bw_expand_lsps(&mut lsps[3][0..], LPC_ORD, 50.0, 100.0);
1327
1328        /* interpolate ------------------------------------------------*/
1329
1330        /* Wo and energy are sampled every 20ms, so we interpolate just 1
1331        10ms frame between 20ms samples */
1332
1333        model[0] = interp_Wo(
1334            &model[0],
1335            &self.internal.prev_model_dec,
1336            &model[1],
1337            self.internal.c2const.Wo_min,
1338        );
1339        e[0] = interp_energy(self.internal.prev_e_dec, e[1]);
1340        model[2] = interp_Wo(
1341            &model[2],
1342            &model[1],
1343            &model[3],
1344            self.internal.c2const.Wo_min,
1345        );
1346        e[2] = interp_energy(e[1], e[3]);
1347
1348        /* LSPs are sampled every 40ms so we interpolate the 3 frames in
1349        between, then recover spectral amplitudes */
1350
1351        for i in 0..3 {
1352            let weight = 0.25 + i as f32 * 0.25;
1353            let lsps3 = lsps[3];
1354            interpolate_lsp_ver2(
1355                &mut lsps[i][0..],
1356                &self.internal.prev_lsps_dec,
1357                &lsps3[0..],
1358                weight,
1359                LPC_ORD,
1360            );
1361        }
1362        for i in 0..4 {
1363            lsp_to_lpc(&lsps[i][0..], &mut ak[i][0..], LPC_ORD);
1364            aks_to_M2(
1365                &mut self.internal.fftr_fwd_cfg,
1366                &ak[i][0..],
1367                LPC_ORD,
1368                &mut model[i],
1369                e[i],
1370                &mut snr,
1371                0,
1372                0,
1373                self.internal.lpc_pf,
1374                self.internal.bass_boost,
1375                self.internal.beta,
1376                self.internal.gamma,
1377                &mut Aw,
1378            );
1379            apply_lpc_correction(&mut model[i]);
1380            self.synthesise_one_frame(
1381                &mut speech[self.internal.n_samp * i..],
1382                &mut model[i],
1383                &Aw,
1384                1.0,
1385            );
1386        }
1387
1388        /* update memories for next frame ----------------------------*/
1389
1390        self.internal.prev_model_dec = model[3];
1391        self.internal.prev_e_dec = e[3];
1392        for i in 0..LPC_ORD {
1393            self.internal.prev_lsps_dec[i] = lsps[3][i];
1394        }
1395    }
1396
1397    /*---------------------------------------------------------------------------*\
1398      FUNCTION....: codec2_encode_1400
1399      AUTHOR......: David Rowe, conversion by Raphael Peters
1400      DATE CREATED: May 11 2012
1401      Encodes 320 speech samples (40ms of speech) into 56 bits.
1402      The codec2 algorithm actually operates internally on 10ms (80
1403      sample) frames, so we run the encoding algorithm 4 times:
1404      frame 0: voicing bit
1405      frame 1: voicing bit, joint VQ of Wo and E
1406      frame 2: voicing bit
1407      frame 3: voicing bit, joint VQ of Wo and E, scalar LSPs
1408      The bit allocation is:
1409        Parameter                      frame 2  frame 4   Total
1410        -------------------------------------------------------
1411        Harmonic magnitudes (LSPs)      0       36        36
1412        Energy+Wo                       8        8        16
1413        Voicing (10ms update)           2        2         4
1414        TOTAL                          10       46        56
1415    \*---------------------------------------------------------------------------*/
1416    fn codec2_encode_1400(&mut self, bits: &mut [u8], speech: &[i16]) {
1417        let mut model = MODEL::new(self.internal.c2const.p_max as f32);
1418        let mut lsps = [0.0; LPC_ORD]; //f32
1419        let mut ak = [0.0; LPC_ORD + 1]; //f32
1420        let mut lsp_indexes = [0; LPC_ORD];
1421        let mut nbit = 0;
1422
1423        let nbyte = (self.bits_per_frame() + 7) / 8;
1424        for i in 0..nbyte {
1425            bits[i] = 0;
1426        }
1427
1428        // frame 1: - voicing
1429
1430        self.analyse_one_frame(&mut model, speech);
1431        pack(bits, &mut nbit, model.voiced, 1);
1432
1433        // frame 2: - voicing, joint Wo & E
1434
1435        self.analyse_one_frame(&mut model, &speech[self.internal.n_samp..]);
1436        pack(bits, &mut nbit, model.voiced, 1);
1437
1438        // need to run this just to get LPC energy
1439        let e = speech_to_uq_lsps(
1440            &mut lsps,
1441            &mut ak,
1442            &self.internal.Sn,
1443            &self.internal.w,
1444            self.internal.m_pitch,
1445            LPC_ORD,
1446        );
1447
1448        let WoE_index = encode_WoE(&model, e, &mut self.internal.xq_enc);
1449        pack(bits, &mut nbit, WoE_index, WO_E_BITS);
1450
1451        // frame 3: - voicing
1452
1453        self.analyse_one_frame(&mut model, &speech[2 * self.internal.n_samp..]);
1454        pack(bits, &mut nbit, model.voiced, 1);
1455
1456        // frame 4: - voicing, joint Wo & E, scalar LSPs
1457
1458        self.analyse_one_frame(&mut model, &speech[3 * self.internal.n_samp..]);
1459        pack(bits, &mut nbit, model.voiced, 1);
1460
1461        let e = speech_to_uq_lsps(
1462            &mut lsps,
1463            &mut ak,
1464            &self.internal.Sn,
1465            &self.internal.w,
1466            self.internal.m_pitch,
1467            LPC_ORD,
1468        );
1469        let WoE_index = encode_WoE(&model, e, &mut self.internal.xq_enc);
1470        pack(bits, &mut nbit, WoE_index, WO_E_BITS);
1471
1472        encode_lsps_scalar(&mut lsp_indexes, &lsps, LPC_ORD);
1473        for i in 0..LSP_SCALAR_INDEXES {
1474            pack(bits, &mut nbit, lsp_indexes[i], lsp_bits(i));
1475        }
1476
1477        assert_eq!(nbit, self.bits_per_frame());
1478    }
1479
1480    /*---------------------------------------------------------------------------*\
1481      FUNCTION....: codec2_decode_1400
1482      AUTHOR......: David Rowe, conversion by Raphael Peters
1483      DATE CREATED: 11 May 2012
1484      Decodes frames of 56 bits into 320 samples (40ms) of speech.
1485    \*---------------------------------------------------------------------------*/
1486    fn codec2_decode_1400(&mut self, speech: &mut [i16], bits: &[u8]) {
1487        let mut model = [MODEL::new(self.internal.c2const.p_max as f32); 4];
1488        let mut lsp_indexes = [0; LPC_ORD];
1489        let mut lsps = [[0.0; LPC_ORD]; 4];
1490        let mut e = [0.0; 4];
1491        let mut snr = 0.0;
1492        let mut ak = [[0.0; LPC_ORD + 1]; 4];
1493        let mut nbit = 0;
1494        let mut Aw = [COMP::new(); FFT_ENC];
1495
1496        /* unpack bits from channel ------------------------------------*/
1497
1498        /* this will partially fill the model params for the 4 x 10ms
1499        frames */
1500
1501        model[0].voiced = unpack(bits, &mut nbit, 1);
1502
1503        model[1].voiced = unpack(bits, &mut nbit, 1);
1504        let WoE_index = unpack(bits, &mut nbit, WO_E_BITS);
1505        decode_WoE(
1506            &self.internal.c2const,
1507            &mut model[1],
1508            &mut e[1],
1509            &mut self.internal.xq_dec,
1510            WoE_index as usize,
1511        );
1512
1513        model[2].voiced = unpack(bits, &mut nbit, 1);
1514
1515        model[3].voiced = unpack(bits, &mut nbit, 1);
1516        let WoE_index = unpack(bits, &mut nbit, WO_E_BITS);
1517        decode_WoE(
1518            &self.internal.c2const,
1519            &mut model[3],
1520            &mut e[3],
1521            &mut self.internal.xq_dec,
1522            WoE_index as usize,
1523        );
1524
1525        for i in 0..LSP_SCALAR_INDEXES {
1526            lsp_indexes[i] = unpack(bits, &mut nbit, lsp_bits(i)) as usize;
1527        }
1528        decode_lsps_scalar(&mut lsps[3][0..], &lsp_indexes, LPC_ORD);
1529        check_lsp_order(&mut lsps[3][0..], LPC_ORD);
1530        bw_expand_lsps(&mut lsps[3][0..], LPC_ORD, 50.0, 100.0);
1531
1532        // interpolate
1533
1534        /* Wo and energy are sampled every 20ms, so we interpolate just 1
1535        10ms frame between 20ms samples */
1536
1537        model[0] = interp_Wo(
1538            &model[0],
1539            &self.internal.prev_model_dec,
1540            &model[1],
1541            self.internal.c2const.Wo_min,
1542        );
1543        e[0] = interp_energy(self.internal.prev_e_dec, e[1]);
1544        model[2] = interp_Wo(
1545            &model[2],
1546            &model[1],
1547            &model[3],
1548            self.internal.c2const.Wo_min,
1549        );
1550        e[2] = interp_energy(e[1], e[3]);
1551
1552        /* LSPs are sampled every 40ms so we interpolate the 3 frames in
1553        between, then recover spectral amplitudes */
1554
1555        for i in 0..3 {
1556            let weight = 0.25 + i as f32 * 0.25;
1557            let lsps3 = lsps[3];
1558            interpolate_lsp_ver2(
1559                &mut lsps[i][0..],
1560                &self.internal.prev_lsps_dec,
1561                &lsps3[0..],
1562                weight,
1563                LPC_ORD,
1564            );
1565        }
1566        for i in 0..4 {
1567            lsp_to_lpc(&lsps[i][0..], &mut ak[i][0..], LPC_ORD);
1568            aks_to_M2(
1569                &mut self.internal.fftr_fwd_cfg,
1570                &ak[i][0..],
1571                LPC_ORD,
1572                &mut model[i],
1573                e[i],
1574                &mut snr,
1575                0,
1576                0,
1577                self.internal.lpc_pf,
1578                self.internal.bass_boost,
1579                self.internal.beta,
1580                self.internal.gamma,
1581                &mut Aw,
1582            );
1583            apply_lpc_correction(&mut model[i]);
1584            self.synthesise_one_frame(
1585                &mut speech[self.internal.n_samp * i..],
1586                &mut model[i],
1587                &Aw,
1588                1.0,
1589            );
1590        }
1591
1592        /* update memories for next frame ----------------------------*/
1593
1594        self.internal.prev_model_dec = model[3];
1595        self.internal.prev_e_dec = e[3];
1596        for i in 0..LPC_ORD {
1597            self.internal.prev_lsps_dec[i] = lsps[3][i];
1598        }
1599    }
1600
1601    /*---------------------------------------------------------------------------*\
1602      FUNCTION....: codec2_encode_1300
1603      AUTHOR......: David Rowe, conversion by Raphael Peters
1604      DATE CREATED: March 14 2013
1605      Encodes 320 speech samples (40ms of speech) into 52 bits.
1606      The codec2 algorithm actually operates internally on 10ms (80
1607      sample) frames, so we run the encoding algorithm 4 times:
1608      frame 0: voicing bit
1609      frame 1: voicing bit,
1610      frame 2: voicing bit
1611      frame 3: voicing bit, Wo and E, scalar LSPs
1612      The bit allocation is:
1613        Parameter                      frame 2  frame 4   Total
1614        -------------------------------------------------------
1615        Harmonic magnitudes (LSPs)      0       36        36
1616        Pitch (Wo)                      0        7         7
1617        Energy                          0        5         5
1618        Voicing (10ms update)           2        2         4
1619        TOTAL                           2       50        52
1620    \*---------------------------------------------------------------------------*/
1621    fn codec2_encode_1300(&mut self, bits: &mut [u8], speech: &[i16]) {
1622        let mut model = MODEL::new(self.internal.c2const.p_max as f32);
1623        let mut lsps = [0.0; LPC_ORD]; //f32
1624        let mut ak = [0.0; LPC_ORD + 1]; //f32
1625        let mut lsp_indexes = [0; LPC_ORD];
1626        let mut nbit = 0;
1627
1628        let nbyte = (self.bits_per_frame() + 7) / 8;
1629        for i in 0..nbyte {
1630            bits[i] = 0;
1631        }
1632
1633        // frame 1: - voicing
1634
1635        self.analyse_one_frame(&mut model, speech);
1636        pack_natural_or_gray(bits, &mut nbit, model.voiced, 1, self.internal.gray as u32);
1637
1638        /* frame 2: - voicing ---------------------------------------------*/
1639
1640        self.analyse_one_frame(&mut model, &speech[self.internal.n_samp..]);
1641        pack_natural_or_gray(bits, &mut nbit, model.voiced, 1, self.internal.gray as u32);
1642
1643        // frame 3: - voicing
1644
1645        self.analyse_one_frame(&mut model, &speech[2 * self.internal.n_samp..]);
1646        pack_natural_or_gray(bits, &mut nbit, model.voiced, 1, self.internal.gray as u32);
1647
1648        // frame 4: - voicing, scalar Wo & E, scalar LSPs
1649
1650        self.analyse_one_frame(&mut model, &speech[3 * self.internal.n_samp..]);
1651        pack_natural_or_gray(bits, &mut nbit, model.voiced, 1, self.internal.gray as u32);
1652
1653        let Wo_index = encode_Wo(&self.internal.c2const, model.Wo, WO_BITS);
1654        pack_natural_or_gray(
1655            bits,
1656            &mut nbit,
1657            Wo_index,
1658            WO_BITS as u32,
1659            self.internal.gray as u32,
1660        );
1661
1662        let e = speech_to_uq_lsps(
1663            &mut lsps,
1664            &mut ak,
1665            &self.internal.Sn,
1666            &self.internal.w,
1667            self.internal.m_pitch,
1668            LPC_ORD,
1669        );
1670        let e_index = encode_energy(e, E_BITS);
1671        pack_natural_or_gray(
1672            bits,
1673            &mut nbit,
1674            e_index,
1675            E_BITS as u32,
1676            self.internal.gray as u32,
1677        );
1678
1679        encode_lsps_scalar(&mut lsp_indexes, &lsps, LPC_ORD);
1680        for i in 0..LSP_SCALAR_INDEXES {
1681            pack_natural_or_gray(
1682                bits,
1683                &mut nbit,
1684                lsp_indexes[i],
1685                lsp_bits(i),
1686                self.internal.gray as u32,
1687            );
1688        }
1689
1690        assert_eq!(nbit, self.bits_per_frame());
1691    }
1692
1693    /*---------------------------------------------------------------------------*\
1694      FUNCTION....: codec2_decode_1300
1695      AUTHOR......: David Rowe, conversion by Raphael Peters
1696      DATE CREATED: 11 May 2012
1697      Decodes frames of 52 bits into 320 samples (40ms) of speech.
1698    \*---------------------------------------------------------------------------*/
1699    fn codec2_decode_1300(&mut self, speech: &mut [i16], bits: &[u8], ber_est: f32) {
1700        let mut model = [MODEL::new(self.internal.c2const.p_max as f32); 4];
1701        let mut lsp_indexes = [0; LPC_ORD];
1702        let mut lsps = [[0.0; LPC_ORD]; 4];
1703        let mut e = [0.0; 4];
1704        let mut snr = 0.0;
1705        let mut ak = [[0.0; LPC_ORD + 1]; 4];
1706        let mut nbit = 0;
1707        let mut Aw = [COMP::new(); FFT_ENC];
1708
1709        /* unpack bits from channel ------------------------------------*/
1710
1711        /* this will partially fill the model params for the 4 x 10ms
1712        frames */
1713
1714        model[0].voiced = unpack_natural_or_gray(bits, &mut nbit, 1, self.internal.gray as u32);
1715        model[1].voiced = unpack_natural_or_gray(bits, &mut nbit, 1, self.internal.gray as u32);
1716        model[2].voiced = unpack_natural_or_gray(bits, &mut nbit, 1, self.internal.gray as u32);
1717        model[3].voiced = unpack_natural_or_gray(bits, &mut nbit, 1, self.internal.gray as u32);
1718
1719        let Wo_index =
1720            unpack_natural_or_gray(bits, &mut nbit, WO_BITS as u32, self.internal.gray as u32);
1721        model[3].Wo = decode_Wo(&self.internal.c2const, Wo_index, WO_BITS);
1722        model[3].L = (PI as f32 / model[3].Wo) as usize;
1723
1724        let e_index =
1725            unpack_natural_or_gray(bits, &mut nbit, E_BITS as u32, self.internal.gray as u32);
1726        e[3] = decode_energy(e_index, E_BITS);
1727
1728        for i in 0..LSP_SCALAR_INDEXES {
1729            lsp_indexes[i] =
1730                unpack_natural_or_gray(bits, &mut nbit, lsp_bits(i), self.internal.gray as u32)
1731                    as usize;
1732        }
1733        decode_lsps_scalar(&mut lsps[3][0..], &lsp_indexes, LPC_ORD);
1734        check_lsp_order(&mut lsps[3][0..], LPC_ORD);
1735        bw_expand_lsps(&mut lsps[3][0..], LPC_ORD, 50.0, 100.0);
1736
1737        if ber_est > 0.15 {
1738            model[0].voiced = 0;
1739            model[1].voiced = 0;
1740            model[2].voiced = 0;
1741            model[3].voiced = 0;
1742            e[3] = decode_energy(10, E_BITS);
1743            bw_expand_lsps(&mut lsps[3][0..], LPC_ORD, 200.0, 200.0);
1744            //fprintf(stderr, "soft mute\n");
1745        }
1746
1747        // interpolate
1748
1749        /* Wo, energy, and LSPs are sampled every 40ms so we interpolate
1750        the 3 frames in between */
1751
1752        for i in 0..3 {
1753            let weight = 0.25 + i as f32 * 0.25;
1754            let lsps3 = lsps[3];
1755            interpolate_lsp_ver2(
1756                &mut lsps[i][0..],
1757                &self.internal.prev_lsps_dec,
1758                &lsps3[0..],
1759                weight,
1760                LPC_ORD,
1761            );
1762            model[i] = interp_Wo2(
1763                &model[i],
1764                &self.internal.prev_model_dec,
1765                &model[3],
1766                weight,
1767                self.internal.c2const.Wo_min,
1768            );
1769            e[i] = interp_energy2(self.internal.prev_e_dec, e[3], weight);
1770        }
1771
1772        /* then recover spectral amplitudes */
1773
1774        for i in 0..4 {
1775            lsp_to_lpc(&lsps[i][0..], &mut ak[i][0..], LPC_ORD);
1776            aks_to_M2(
1777                &mut self.internal.fftr_fwd_cfg,
1778                &ak[i][0..],
1779                LPC_ORD,
1780                &mut model[i],
1781                e[i],
1782                &mut snr,
1783                0,
1784                0,
1785                self.internal.lpc_pf,
1786                self.internal.bass_boost,
1787                self.internal.beta,
1788                self.internal.gamma,
1789                &mut Aw,
1790            );
1791            apply_lpc_correction(&mut model[i]);
1792            self.synthesise_one_frame(
1793                &mut speech[self.internal.n_samp * i..],
1794                &mut model[i],
1795                &Aw,
1796                1.0,
1797            );
1798
1799            /* dump parameters for deep learning experiments */
1800            /*
1801                          if (c2 -> fmlfeat != NULL) {
1802                              /* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */
1803                              fwrite(&lsps[i][0], LPC_ORD, sizeof(float), self.internal.fmlfeat);
1804                              fwrite(&e[i], 1, sizeof(float), self.internal.fmlfeat);
1805                              fwrite(&model[i].Wo, 1, sizeof(float), self.internal.fmlfeat);
1806                              float
1807                              voiced_float = model[i].voiced;
1808                              fwrite(&voiced_float, 1, sizeof(float), self.internal.fmlfeat);
1809                              fwrite(&ak[i][1], LPC_ORD, sizeof(float), self.internal.fmlfeat);
1810                          }
1811
1812            */
1813        }
1814
1815        /*
1816        #ifdef DUMP
1817        dump_lsp_(&lsps[3][0]);
1818        dump_ak_(&ak[3][0], LPC_ORD);
1819        #endif
1820        */
1821
1822        /* update memories for next frame ----------------------------*/
1823
1824        self.internal.prev_model_dec = model[3];
1825        self.internal.prev_e_dec = e[3];
1826        for i in 0..LPC_ORD {
1827            self.internal.prev_lsps_dec[i] = lsps[3][i];
1828        }
1829    }
1830
1831    /*---------------------------------------------------------------------------*\
1832      FUNCTION....: codec2_encode_1200
1833      AUTHOR......: David Rowe, conversion by Raphael Peters
1834      DATE CREATED: Nov 14 2011
1835      Encodes 320 speech samples (40ms of speech) into 48 bits.
1836      The codec2 algorithm actually operates internally on 10ms (80
1837      sample) frames, so we run the encoding algorithm four times:
1838      frame 0: voicing bit
1839      frame 1: voicing bit, joint VQ of Wo and E
1840      frame 2: voicing bit
1841      frame 3: voicing bit, joint VQ of Wo and E, VQ LSPs
1842      The bit allocation is:
1843        Parameter                      frame 2  frame 4   Total
1844        -------------------------------------------------------
1845        Harmonic magnitudes (LSPs)      0       27        27
1846        Energy+Wo                       8        8        16
1847        Voicing (10ms update)           2        2         4
1848        Spare                           0        1         1
1849        TOTAL                          10       38        48
1850    \*---------------------------------------------------------------------------*/
1851    fn codec2_encode_1200(&mut self, bits: &mut [u8], speech: &[i16]) {
1852        let mut model = MODEL::new(self.internal.c2const.p_max as f32);
1853        let mut lsps = [0.0; LPC_ORD]; //f32
1854        let mut lsps_ = [0.0; LPC_ORD]; //f32
1855        let mut ak = [0.0; LPC_ORD + 1]; //f32
1856        let mut lsp_indexes = [0; LPC_ORD];
1857        let spare = 0;
1858        let mut nbit = 0;
1859
1860        let nbyte = (self.bits_per_frame() + 7) / 8;
1861        for i in 0..nbyte {
1862            bits[i] = 0;
1863        }
1864
1865        // frame 1: - voicing
1866
1867        self.analyse_one_frame(&mut model, speech);
1868        pack(bits, &mut nbit, model.voiced, 1);
1869
1870        // frame 2: - voicing, joint Wo & E
1871
1872        self.analyse_one_frame(&mut model, &speech[self.internal.n_samp..]);
1873        pack(bits, &mut nbit, model.voiced, 1);
1874
1875        // need to run this just to get LPC energy
1876        let e = speech_to_uq_lsps(
1877            &mut lsps,
1878            &mut ak,
1879            &self.internal.Sn,
1880            &self.internal.w,
1881            self.internal.m_pitch,
1882            LPC_ORD,
1883        );
1884
1885        let WoE_index = encode_WoE(&model, e, &mut self.internal.xq_enc);
1886        pack(bits, &mut nbit, WoE_index, WO_E_BITS);
1887
1888        // frame 3: - voicing
1889
1890        self.analyse_one_frame(&mut model, &speech[2 * self.internal.n_samp..]);
1891        pack(bits, &mut nbit, model.voiced, 1);
1892
1893        // frame 4: - voicing, joint Wo & E, scalar LSPs
1894
1895        self.analyse_one_frame(&mut model, &speech[3 * self.internal.n_samp..]);
1896        pack(bits, &mut nbit, model.voiced, 1);
1897
1898        let e = speech_to_uq_lsps(
1899            &mut lsps,
1900            &mut ak,
1901            &self.internal.Sn,
1902            &self.internal.w,
1903            self.internal.m_pitch,
1904            LPC_ORD,
1905        );
1906        let WoE_index = encode_WoE(&model, e, &mut self.internal.xq_enc);
1907        pack(bits, &mut nbit, WoE_index, WO_E_BITS);
1908
1909        encode_lsps_vq(&mut lsp_indexes, &mut lsps, &mut lsps_, LPC_ORD);
1910        for i in 0..LSP_PRED_VQ_INDEXES {
1911            pack(
1912                bits,
1913                &mut nbit,
1914                lsp_indexes[i] as i32,
1915                lsp_pred_vq_bits(i) as u32,
1916            );
1917        }
1918        pack(bits, &mut nbit, spare, 1);
1919
1920        assert_eq!(nbit, self.bits_per_frame());
1921    }
1922
1923    /*---------------------------------------------------------------------------*\
1924      FUNCTION....: codec2_decode_1200
1925      AUTHOR......: David Rowe, conversion by Raphael Peters
1926      DATE CREATED: 14 Feb 2012
1927      Decodes frames of 48 bits into 320 samples (40ms) of speech.
1928    \*---------------------------------------------------------------------------*/
1929
1930    fn codec2_decode_1200(&mut self, speech: &mut [i16], bits: &[u8]) {
1931        let mut model = [MODEL::new(self.internal.c2const.p_max as f32); 4];
1932        let mut lsp_indexes = [0; LPC_ORD];
1933        let mut lsps = [[0.0; LPC_ORD]; 4];
1934        let mut e = [0.0; 4];
1935        let mut snr = 0.0;
1936        let mut ak = [[0.0; LPC_ORD + 1]; 4];
1937        let mut nbit = 0;
1938        let mut Aw = [COMP::new(); FFT_ENC];
1939        // only need to zero these out due to (unused) snr calculation
1940
1941        for i in 0..4 {
1942            for j in 1..MAX_AMP {
1943                model[i].A[j] = 0.0;
1944            }
1945        }
1946
1947        /* unpack bits from channel ------------------------------------*/
1948
1949        /* this will partially fill the model params for the 4 x 10ms
1950        frames */
1951
1952        model[0].voiced = unpack(bits, &mut nbit, 1);
1953
1954        model[1].voiced = unpack(bits, &mut nbit, 1);
1955        let WoE_index = unpack(bits, &mut nbit, WO_E_BITS);
1956        decode_WoE(
1957            &self.internal.c2const,
1958            &mut model[1],
1959            &mut e[1],
1960            &mut self.internal.xq_dec,
1961            WoE_index as usize,
1962        );
1963
1964        model[2].voiced = unpack(bits, &mut nbit, 1);
1965
1966        model[3].voiced = unpack(bits, &mut nbit, 1);
1967        let WoE_index = unpack(bits, &mut nbit, WO_E_BITS);
1968        decode_WoE(
1969            &self.internal.c2const,
1970            &mut model[3],
1971            &mut e[3],
1972            &mut self.internal.xq_dec,
1973            WoE_index as usize,
1974        );
1975
1976        for i in 0..LSP_PRED_VQ_INDEXES {
1977            lsp_indexes[i] = unpack(bits, &mut nbit, lsp_pred_vq_bits(i) as u32) as usize;
1978        }
1979        decode_lsps_vq(&lsp_indexes, &mut lsps[3][0..], LPC_ORD, 0);
1980        check_lsp_order(&mut lsps[3][0..], LPC_ORD);
1981        bw_expand_lsps(&mut lsps[3][0..], LPC_ORD, 50.0, 100.0);
1982
1983        // interpolate
1984
1985        /* Wo and energy are sampled every 20ms, so we interpolate just 1
1986        10ms frame between 20ms samples */
1987
1988        model[0] = interp_Wo(
1989            &model[0],
1990            &self.internal.prev_model_dec,
1991            &model[1],
1992            self.internal.c2const.Wo_min,
1993        );
1994        e[0] = interp_energy(self.internal.prev_e_dec, e[1]);
1995        model[2] = interp_Wo(
1996            &model[2],
1997            &model[1],
1998            &model[3],
1999            self.internal.c2const.Wo_min,
2000        );
2001        e[2] = interp_energy(e[1], e[3]);
2002
2003        /* LSPs are sampled every 40ms so we interpolate the 3 frames in
2004        between, then recover spectral amplitudes */
2005
2006        for i in 0..3 {
2007            let weight = 0.25 + i as f32 * 0.25;
2008            let lsps3 = lsps[3];
2009            interpolate_lsp_ver2(
2010                &mut lsps[i][0..],
2011                &self.internal.prev_lsps_dec,
2012                &lsps3[0..],
2013                weight,
2014                LPC_ORD,
2015            );
2016        }
2017        for i in 0..4 {
2018            lsp_to_lpc(&lsps[i][0..], &mut ak[i][0..], LPC_ORD);
2019            aks_to_M2(
2020                &mut self.internal.fftr_fwd_cfg,
2021                &ak[i][0..],
2022                LPC_ORD,
2023                &mut model[i],
2024                e[i],
2025                &mut snr,
2026                0,
2027                0,
2028                self.internal.lpc_pf,
2029                self.internal.bass_boost,
2030                self.internal.beta,
2031                self.internal.gamma,
2032                &mut Aw,
2033            );
2034            apply_lpc_correction(&mut model[i]);
2035            self.synthesise_one_frame(
2036                &mut speech[self.internal.n_samp * i..],
2037                &mut model[i],
2038                &Aw,
2039                1.0,
2040            );
2041        }
2042
2043        /* update memories for next frame ----------------------------*/
2044
2045        self.internal.prev_model_dec = model[3];
2046        self.internal.prev_e_dec = e[3];
2047        for i in 0..LPC_ORD {
2048            self.internal.prev_lsps_dec[i] = lsps[3][i];
2049        }
2050    }
2051
2052    /*---------------------------------------------------------------------------* \
2053
2054      FUNCTION....: analyse_one_frame()
2055      AUTHOR......: David Rowe, conversion by Matt Weeks
2056      DATE CREATED: 23/8/2010
2057
2058      Extract sinusoidal model parameters from 80 speech samples (10ms of
2059      speech).
2060
2061    \*---------------------------------------------------------------------------*/
2062    fn analyse_one_frame(&mut self, model: &mut MODEL, speech: &[i16]) {
2063        let mut Sw = [COMP::new(); FFT_ENC];
2064        let n_samp = self.internal.n_samp;
2065        let m_pitch = self.internal.m_pitch;
2066
2067        //  Read input speech
2068        for i in 0..m_pitch - n_samp {
2069            self.internal.Sn[i] = self.internal.Sn[i + n_samp];
2070        }
2071        for i in 0..n_samp {
2072            self.internal.Sn[i + m_pitch - n_samp] = speech[i].into();
2073        }
2074        self.internal.c2const.dft_speech(
2075            &self.internal.fft_fwd_cfg,
2076            &mut Sw,
2077            &self.internal.Sn,
2078            &self.internal.w,
2079        );
2080
2081        //  Estimate pitch
2082        let mut pitch = 0.0;
2083        nlp::nlp(
2084            &mut self.internal.nlp,
2085            &self.internal.Sn,
2086            n_samp,
2087            &mut pitch,
2088            &Sw,
2089            &self.internal.W,
2090            &mut self.internal.prev_f0_enc,
2091        );
2092        model.Wo = TWO_PI / pitch;
2093        model.L = (PI / model.Wo as f64) as usize;
2094
2095        //  estimate model parameters
2096        two_stage_pitch_refinement(&self.internal.c2const, model, &Sw);
2097
2098        //  estimate phases when doing ML experiments
2099        estimate_amplitudes(model, &Sw, &self.internal.W, 0);
2100        est_voicing_mbe(&self.internal.c2const, model, &Sw, &self.internal.W);
2101    }
2102
2103    /*---------------------------------------------------------------------------* \
2104
2105      FUNCTION....: synthesise_one_frame()
2106      AUTHOR......: David Rowe, conversion by Matt Weeks
2107      DATE CREATED: 23/8/2010
2108
2109      Synthesise 80 speech samples (10ms) from model parameters.
2110
2111    \*---------------------------------------------------------------------------*/
2112    fn synthesise_one_frame(
2113        &mut self,
2114        speech: &mut [i16],
2115        model: &mut MODEL,
2116        Aw: &[COMP],
2117        gain: f32,
2118    ) {
2119        //  LPC based phase synthesis
2120        let mut H = [COMP::new(); MAX_AMP + 1];
2121        sample_phase(model, &mut H, Aw);
2122        phase_synth_zero_order(
2123            self.internal.n_samp,
2124            model,
2125            &mut self.internal.ex_phase,
2126            &mut H,
2127        );
2128
2129        postfilter(model, &mut self.internal.bg_est);
2130        synthesise(
2131            self.internal.n_samp,
2132            &mut self.internal.fftr_inv_cfg,
2133            &mut self.internal.Sn_,
2134            model,
2135            &self.internal.Pn,
2136            true,
2137        );
2138
2139        for i in 0..self.internal.n_samp {
2140            self.internal.Sn_[i] *= gain;
2141        }
2142
2143        ear_protection(&mut self.internal.Sn_, self.internal.n_samp);
2144
2145        for i in 0..self.internal.n_samp {
2146            if self.internal.Sn_[i] > 32767.0 {
2147                speech[i] = 32767;
2148            } else if self.internal.Sn_[i] < -32767.0 {
2149                speech[i] = -32767;
2150            } else {
2151                speech[i] = self.internal.Sn_[i] as i16;
2152            }
2153        }
2154    }
2155}
2156
2157/*---------------------------------------------------------------------------* \
2158
2159  FUNCTION....: ear_protection()
2160  AUTHOR......: David Rowe, conversion by Matt Weeks
2161  DATE CREATED: Nov 7 2012
2162
2163  Limits output level to protect ears when there are bit errors or the input
2164  is overdriven.  This doesn't correct or mask bit errors, just reduces the
2165  worst of their damage.
2166
2167\*---------------------------------------------------------------------------*/
2168fn ear_protection(in_out: &mut [f32], n: usize) {
2169    //  find maximum sample in frame
2170
2171    let mut max_sample = 0.0;
2172    for i in 0..n {
2173        if in_out[i] > max_sample {
2174            max_sample = in_out[i];
2175        }
2176    }
2177
2178    //  determine how far above set point
2179
2180    let over = max_sample / 30000.0;
2181
2182    /* If we are x dB over set point we reduce level by 2x dB, this
2183    attenuates major excursions in amplitude (likely to be caused
2184    by bit errors) more than smaller ones */
2185
2186    if over > 1.0 {
2187        let gain = 1.0 / (over * over);
2188        for i in 0..n {
2189            in_out[i] *= gain;
2190        }
2191    }
2192}
2193
2194/*---------------------------------------------------------------------------*\
2195
2196  est_voicing_mbe()
2197
2198  Returns the error of the MBE cost function for a fiven F0.
2199
2200  Note: I think a lot of the operations below can be simplified as
2201  W[].i = 0 and has been normalised such that den always equals 1.
2202
2203\*---------------------------------------------------------------------------*/
2204fn est_voicing_mbe(c2const: &C2const, model: &mut MODEL, Sw: &[COMP], W: &[f32]) -> f32 {
2205    let mut Am = COMP::new(); // amplitude sample for this band
2206    let mut Ew = COMP::new();
2207
2208    let l_1000hz = (model.L as f32 * 1000.0 / ((c2const.Fs / 2) as f32)) as usize;
2209    let mut sig = 1E-4;
2210    for l in 1..l_1000hz + 1 {
2211        sig += model.A[l] * model.A[l];
2212    }
2213
2214    let Wo = model.Wo;
2215    let mut error = 1E-4; //accumulated error between original and synthesised
2216
2217    //  Just test across the harmonics in the first 1000 Hz
2218
2219    for l in 1..l_1000hz + 1 {
2220        Am.r = 0.0;
2221        Am.i = 0.0;
2222        let mut den = 0.0; //denominator of Am expression
2223        let al = ((l as f32 - 0.5) * Wo * (FFT_ENC as f32) / TWO_PI).ceil() as usize;
2224        let bl = ((l as f32 + 0.5) * Wo * (FFT_ENC as f32) / TWO_PI).ceil() as usize;
2225
2226        //  Estimate amplitude of harmonic assuming harmonic is totally voiced
2227
2228        // centers Hw[] about current harmonic
2229        let offset =
2230            (FFT_ENC as f32 / 2.0 - (l as f32) * Wo * (FFT_ENC as f32) / TWO_PI + 0.5) as usize;
2231        for m in al..bl {
2232            Am.r += Sw[m].r * W[offset + m];
2233            Am.i += Sw[m].i * W[offset + m];
2234            den += W[offset + m] * W[offset + m];
2235        }
2236
2237        Am.r = Am.r / den;
2238        Am.i = Am.i / den;
2239
2240        //  Determine error between estimated harmonic and original
2241
2242        for m in al..bl {
2243            Ew.r = Sw[m].r - Am.r * W[offset + m];
2244            Ew.i = Sw[m].i - Am.i * W[offset + m];
2245            error += Ew.r * Ew.r;
2246            error += Ew.i * Ew.i;
2247        }
2248    }
2249
2250    let snr = 10.0 * (sig / error).log10();
2251    if snr > V_THRESH {
2252        model.voiced = 1;
2253    } else {
2254        model.voiced = 0;
2255    }
2256    //  post processing, helps clean up some voicing errors ------------------
2257
2258    /*
2259       Determine the ratio of low freqency to high frequency energy,
2260       voiced speech tends to be dominated by low frequency energy,
2261       unvoiced by high frequency. This measure can be used to
2262       determine if we have made any gross errors.
2263    */
2264
2265    let l_2000hz = (model.L as f32 * 2000.0 / (c2const.Fs as f32 / 2.0)) as usize;
2266    let l_4000hz = (model.L as f32 * 4000.0 / (c2const.Fs as f32 / 2.0)) as usize;
2267    let mut ehigh = 1E-4;
2268    let mut elow = ehigh;
2269    for l in 1..l_2000hz + 1 {
2270        elow += model.A[l] * model.A[l];
2271    }
2272    for l in l_2000hz..l_4000hz + 1 {
2273        ehigh += model.A[l] * model.A[l];
2274    }
2275    let eratio = 10.0 * (elow / ehigh).log10();
2276
2277    /* Look for Type 1 errors, strongly V speech that has been
2278    accidentally declared UV */
2279
2280    if model.voiced == 0 {
2281        if eratio > 10.0 {
2282            model.voiced = 1;
2283        }
2284    }
2285    /* Look for Type 2 errors, strongly UV speech that has been
2286    accidentally declared V */
2287
2288    if model.voiced == 1 {
2289        if eratio < -10.0 {
2290            model.voiced = 0;
2291        }
2292        /* A common source of Type 2 errors is the pitch estimator
2293        gives a low (50Hz) estimate for UV speech, which gives a
2294        good match with noise due to the close harmoonic spacing.
2295        These errors are much more common than people with 50Hz3
2296        pitch, so we have just a small eratio threshold. */
2297
2298        let sixty = 60.0 * TWO_PI / (c2const.Fs as f32);
2299        if (eratio < -4.0) && (model.Wo <= sixty) {
2300            model.voiced = 0;
2301        }
2302    }
2303
2304    return snr;
2305}
2306
2307/*---------------------------------------------------------------------------*\
2308
2309  FUNCTION....: estimate_amplitudes
2310  AUTHOR......: David Rowe, conversion by Matt Weeks
2311  DATE CREATED: 27/5/94
2312
2313  Estimates the complex amplitudes of the harmonics.
2314
2315\*---------------------------------------------------------------------------*/
2316fn estimate_amplitudes(model: &mut MODEL, Sw: &[COMP], _W: &[f32], est_phase: i32) {
2317    let r = TWO_PI / (FFT_ENC as f32);
2318    let one_on_r = 1.0 / r;
2319
2320    for m in 1..model.L + 1 {
2321        //  Estimate ampltude of harmonic
2322
2323        let mut den = 0.0; // denominator of amplitude expression
2324                           // bounds of current harmonic
2325        let am = ((m as f32 - 0.5) * model.Wo * one_on_r + 0.5) as usize;
2326        let bm = ((m as f32 + 0.5) * model.Wo * one_on_r + 0.5) as usize;
2327
2328        for i in am..bm {
2329            den += Sw[i].r * Sw[i].r + Sw[i].i * Sw[i].i;
2330        }
2331
2332        model.A[m] = den.sqrt();
2333
2334        if est_phase != 0 {
2335            let b = (m as f32 * model.Wo / r + 0.5) as usize; //  DFT bin of centre of current harmonic
2336
2337            /* Estimate phase of harmonic, this is expensive in CPU for
2338            embedded devicesso we make it an option */
2339
2340            model.phi[m] = Sw[b].i.atan2(Sw[b].r);
2341        }
2342    }
2343}
2344
2345/*---------------------------------------------------------------------------*\
2346
2347  FUNCTION....: two_stage_pitch_refinement
2348  AUTHOR......: David Rowe, conversion by Matt Weeks
2349  DATE CREATED: 27/5/94
2350
2351  Refines the current pitch estimate using the harmonic sum pitch
2352  estimation technique.
2353
2354\*---------------------------------------------------------------------------*/
2355fn two_stage_pitch_refinement(c2const: &C2const, model: &mut MODEL, Sw: &[COMP]) {
2356    //  Coarse refinement
2357    //  pitch refinment minimum, maximum and step
2358    let mut pmax = TWO_PI / model.Wo + 5.0;
2359    let mut pmin = TWO_PI / model.Wo - 5.0;
2360    let mut pstep = 1.0;
2361    hs_pitch_refinement(model, Sw, pmin, pmax, pstep);
2362
2363    //  Fine refinement
2364
2365    pmax = TWO_PI / model.Wo + 1.0;
2366    pmin = TWO_PI / model.Wo - 1.0;
2367    pstep = 0.25;
2368    hs_pitch_refinement(model, Sw, pmin, pmax, pstep);
2369
2370    //  Limit range
2371
2372    if model.Wo < TWO_PI / (c2const.p_max as f32) {
2373        model.Wo = TWO_PI / (c2const.p_max as f32);
2374    }
2375    if model.Wo > TWO_PI / (c2const.p_min as f32) {
2376        model.Wo = TWO_PI / (c2const.p_min as f32);
2377    }
2378
2379    model.L = (PI / model.Wo as f64).floor() as usize;
2380
2381    //  trap occasional round off issues with floorf()
2382    if model.Wo * model.L as f32 >= 0.95 * PI as f32 {
2383        model.L -= 1;
2384    }
2385    //  assert(model.Wo*model.L < PI);
2386}
2387
2388/*---------------------------------------------------------------------------*\
2389
2390 FUNCTION....: hs_pitch_refinement
2391 AUTHOR......: David Rowe, conversion by Matt Weeks
2392 DATE CREATED: 27/5/94
2393
2394 Harmonic sum pitch refinement function.
2395
2396 pmin   pitch search range minimum
2397 pmax	pitch search range maximum
2398 step   pitch search step size
2399 model	current pitch estimate in model.Wo
2400
2401 model 	refined pitch estimate in model.Wo
2402
2403\*---------------------------------------------------------------------------*/
2404fn hs_pitch_refinement(model: &mut MODEL, Sw: &[COMP], pmin: f32, pmax: f32, pstep: f32) {
2405    //  Initialisation
2406
2407    model.L = (PI / model.Wo as f64) as usize; //  use initial pitch est. for L
2408    let mut Wom = model.Wo; // Wo that maximises E
2409    let mut Em = 0.0; // mamimum energy
2410    let r = TWO_PI / FFT_ENC as f32; // number of rads/bin
2411    let one_on_r = 1.0 / r;
2412
2413    //  Determine harmonic sum for a range of Wo values
2414    let mut p = pmin; // current pitch
2415    while p <= pmax {
2416        let mut E = 0.0; //energy for current pitch
2417        let Wo = TWO_PI / p; // current "test" fundamental freq.
2418
2419        //  Sum harmonic magnitudes
2420        for m in 1..model.L + 1 {
2421            // bin for current harmonic centre
2422            let b = (m as f32 * Wo * one_on_r + 0.5) as usize;
2423            E += Sw[b].r * Sw[b].r + Sw[b].i * Sw[b].i;
2424        }
2425        //  Compare to see if this is a maximum
2426
2427        if E > Em {
2428            Em = E;
2429            Wom = Wo;
2430        }
2431        p += pstep;
2432    }
2433
2434    model.Wo = Wom;
2435}
codec2/lib.rs

codec2/
lib.rs