pulsejet_rs/
encode.rs

1use std::{collections::BTreeMap, f32::consts::PI};
2
3use crate::{
4    cmath::CMath,
5    common::*,
6    encode::helpers::{order0_bits_estimate, BAND_BIN_QUANTIZE_SCALE_BASES},
7};
8
9mod helpers {
10    use crate::common::NUM_BANDS;
11
12    use super::Map;
13
14    pub const BAND_BIN_QUANTIZE_SCALE_BASES: [u8; NUM_BANDS] = [
15        200, 200, 200, 200, 200, 200, 200, 200, 198, 193, 188, 183, 178, 173,
16        168, 163, 158, 153, 148, 129,
17    ];
18
19    pub fn order0_bits_estimate<Key>(freqs: &Map<Key, u32>) -> f64 {
20        let num_symbols: u32 = freqs.values().sum();
21        let mut bits_estimate: f64 = 0.0;
22        for &freq in freqs.values() {
23            let prob = freq as f64 / num_symbols as f64;
24            bits_estimate += -f64::log2(prob) * freq as f64;
25        }
26        bits_estimate
27    }
28}
29
30type Map<K, V> = BTreeMap<K, V>;
31
32#[non_exhaustive]
33pub struct EncodeResult {
34    /// Encoded sample stream.
35    pub stream: Vec<u8>,
36    /// Total bits estimate for the
37    /// encoded sample. This will typically differ slightly
38    /// from the actual size after compression, but on average
39    /// is accurate enough to be useful.
40    pub total_bits_estimate: f64,
41}
42
43/// Encodes a raw sample stream into a newly-allocated vector.
44///
45/// Like `Decode`, this function expects `CosF` and `SinF` to be defined
46/// by the user in the `Pulsejet::Shims` namespace before including the
47/// relevant pulsejet header(s). See the documentation for `Decode` for
48/// more information.
49///
50/// # Inputs
51///
52/// - `sample_stream` - Input sample stream.
53/// - `sample_rate` - Input sample rate in samples per second (hz).
54///   pulsejet is designed for 44100hz samples only, and its
55///   psychoacoustics are tuned to that rate. However, other rates
56///   may do something useful/interesting, so this rate is not
57///   enforced, and the encoder will happily try to match a target
58///   bit rate at another sample rate if desired.
59/// - `target_bit_rate` - Target bit rate in kilobits per second (kbps).
60///   There's no enforced lower/upper bound, but due to codec format
61///   details, the resulting bit rate will often plateau around
62///   128kbps (or lower, depending on the material). ~64kbps is
63///   typically transparent, ~32-64kbps is typically high quality.
64///   For anything lower, it depends on the material, but it's not
65///   uncommon for rates <=16kbps to actually be useful. <=0kbps
66///   will usually end up around 2-3kbps.
67pub fn encode<M: CMath>(
68    sample_stream: &[f32],
69    sample_rate: f64,
70    target_bit_rate: f64,
71) -> EncodeResult {
72    let mut v = vec![];
73    let mut total_bits_estimate = 0.0;
74
75    // Determine target bits/frame
76    let target_bits_per_frame =
77        target_bit_rate * 1000.0 * ((FRAME_SIZE as f64) / sample_rate);
78
79    // Write out tag+version number
80    v.extend_from_slice(&SAMPLE_TAG);
81    v.extend_from_slice(&CODEC_VERSION_MAJOR.to_le_bytes());
82    v.extend_from_slice(&CODEC_VERSION_MINOR.to_le_bytes());
83
84    // Determine and output number of frames
85    let mut num_frames =
86        (sample_stream.len() as u32 + FRAME_SIZE - 1) / FRAME_SIZE;
87    v.extend_from_slice(&(num_frames as u16).to_le_bytes());
88
89    // We're going to decode one more frame than we output, so adjust
90    // the frame count
91    num_frames += 1;
92
93    // Allocate internal sample buffer including padding, fill it with silence,
94    // and copy input data into it
95    let num_samples = num_frames * FRAME_SIZE;
96    let num_padded_samples = num_samples + FRAME_SIZE * 2;
97    let mut padded_samples = vec![0.0f32; num_padded_samples as usize];
98    padded_samples[FRAME_SIZE as usize..][..sample_stream.len()]
99        .copy_from_slice(sample_stream);
100
101    // Fill padding regions with mirrored frames from the original sample
102    for i in 0..FRAME_SIZE {
103        // Head padding
104        padded_samples[(FRAME_SIZE - 1 - i) as usize] =
105            padded_samples[(FRAME_SIZE + i) as usize];
106        // Tail padding
107        padded_samples[(num_padded_samples - FRAME_SIZE + i) as usize] =
108            padded_samples[(num_padded_samples - FRAME_SIZE - 1 - i) as usize];
109    }
110
111    // Allocate separate streams to group correlated data
112    let mut window_mode_stream: Vec<u8> = vec![];
113    let mut band_energy_stream: Vec<u8> = vec![];
114    let mut bin_qstream: Vec<i8> = vec![];
115
116    // Clear quantized band energy predictions
117    let mut quantized_band_energy_predictions = vec![0u8; NUM_BANDS];
118
119    // Clear slack bits
120    let mut slack_bits: f64 = 0.0;
121
122    // Build transient frame map
123    let mut is_transient_frame_map: Vec<bool> = vec![];
124    let mut last_frame_energy: f32 = 0.0f32;
125    for frame_index in 0..num_frames {
126        // Conceptually, frames are centered around the center of each
127        // long window
128        let frame_offset = FRAME_SIZE / 2 + frame_index * FRAME_SIZE;
129        let mut frame_energy: f32 = 0.0f32;
130        for i in 0..FRAME_SIZE {
131            let sample = padded_samples[(frame_offset + i) as usize];
132            frame_energy += sample * sample;
133        }
134        is_transient_frame_map.push(frame_energy >= last_frame_energy * 2.0f32);
135        last_frame_energy = frame_energy;
136    }
137
138    // Encode frames
139    for frame_index in 0..num_frames {
140        // Determine and output window mode for this frame
141        let is_transient_frame = is_transient_frame_map[frame_index as usize];
142        let mut window_mode: WindowMode = WindowMode::Long;
143        if target_bit_rate > 8.0 {
144            let is_prev_frame_transient_frame = frame_index > 0
145                && is_transient_frame_map[(frame_index - 1) as usize];
146            let is_next_frame_transient_frame = frame_index < num_frames - 1
147                && is_transient_frame_map[(frame_index + 1) as usize];
148            if is_transient_frame
149                || (is_prev_frame_transient_frame
150                    && is_next_frame_transient_frame)
151            {
152                window_mode = WindowMode::Short;
153            } else if is_next_frame_transient_frame {
154                window_mode = WindowMode::Start;
155            } else if is_prev_frame_transient_frame {
156                window_mode = WindowMode::Stop;
157            }
158        }
159        window_mode_stream.push(window_mode as u8);
160
161        // Determine subframe configuration from window mode
162        let mut num_subframes: u32 = 1;
163        let mut subframe_window_offset: u32 = 0;
164        let mut subframe_window_size: u32 = LONG_WINDOW_SIZE;
165        if window_mode == WindowMode::Short {
166            num_subframes = NUM_SHORT_WINDOWS_PER_FRAME;
167            subframe_window_offset =
168                LONG_WINDOW_SIZE / 4 - SHORT_WINDOW_SIZE / 4;
169            subframe_window_size = SHORT_WINDOW_SIZE;
170        }
171        let subframe_size = subframe_window_size / 2;
172
173        let target_bits_per_subframe =
174            target_bits_per_frame / (num_subframes as f64);
175
176        // Encode subframe(s)
177        for subframe_index in 0..num_subframes {
178            let mut window_bins: Vec<f32> = vec![];
179            window_bins.reserve(subframe_size as usize);
180            {
181                // Apply window
182                let frame_offset = frame_index * FRAME_SIZE;
183                let window_offset =
184                    subframe_window_offset + subframe_index * subframe_size;
185                let mut windowed_samples: Vec<f32> = vec![];
186                windowed_samples.reserve(subframe_window_size as usize);
187                for n in 0..subframe_window_size {
188                    let sample = padded_samples
189                        [(frame_offset + window_offset + n) as usize];
190                    let window =
191                        mdct_window(n, subframe_window_size, window_mode);
192                    windowed_samples.push(sample * window);
193                }
194
195                // Perform MDCT
196                for k in 0..subframe_size {
197                    let mut bin: f32 = 0.0;
198                    for n in 0..subframe_window_size {
199                        bin += windowed_samples[n as usize]
200                            * M::cos(
201                                PI / (subframe_size as f32)
202                                    * (n as f32
203                                        + 0.5
204                                        + (subframe_size / 2) as f32)
205                                    * (k as f32 + 0.5),
206                            );
207                    }
208                    window_bins.push(bin);
209                }
210            }
211
212            // Search (exhaustively) for an appropriate bin quantization
213            // scaling factor
214            let mut best_quantized_band_energies: Vec<u8> = vec![];
215            let mut best_band_energy_stream: Vec<u8> = vec![];
216            let mut best_bin_qstream: Vec<i8> = vec![];
217            let mut best_subframe_bits_estimate: f64 = 0.0;
218
219            let min_scaling_factor: u32 = 1;
220            let max_scaling_factor: u32 = 500;
221            for scaling_factor in min_scaling_factor..=max_scaling_factor {
222                let mut candidate_quantized_band_energies: Vec<u8> = vec![];
223                let mut candidate_band_energy_stream: Vec<u8> = vec![];
224                candidate_quantized_band_energies.reserve(NUM_BANDS);
225                candidate_band_energy_stream.reserve(NUM_BANDS);
226                let mut candidate_band_energy_freqs: Map<u8, u32> =
227                    Default::default();
228                let mut candidate_bin_qstream: Vec<i8> = vec![];
229                candidate_bin_qstream.reserve(subframe_size as usize);
230                let mut candidate_bin_qfreqs: Map<i8, u32> = Default::default();
231
232                // Encode bands
233                let mut band_bins = &window_bins[..];
234                for band_index in 0..NUM_BANDS {
235                    let num_bins =
236                        BAND_TO_NUM_BINS[band_index] as u32 / num_subframes;
237
238                    // Calculate band energy
239                    let epsilon: f32 = 1e-27;
240                    let mut band_energy: f32 = epsilon;
241                    for bin_index in 0..num_bins {
242                        let bin = band_bins[bin_index as usize];
243                        band_energy += bin * bin;
244                    }
245                    band_energy = M::sqrt(band_energy);
246
247                    // Quantize and encode band energy
248                    let linear_band_energy = (f32::clamp(
249                        f32::log2(band_energy / num_bins as f32),
250                        -20.0f32,
251                        20.0f32,
252                    ) + 20.0f32)
253                        / 40.0f32;
254                    let quantized_band_energy =
255                        (f32::round(linear_band_energy * 64.0f32)) as u8;
256                    candidate_quantized_band_energies
257                        .push(quantized_band_energy);
258                    let quantized_band_energy_residual: u8 =
259                        quantized_band_energy
260                            - quantized_band_energy_predictions[band_index];
261                    candidate_band_energy_stream
262                        .push(quantized_band_energy_residual);
263                    *candidate_band_energy_freqs
264                        .entry(quantized_band_energy_residual)
265                        .or_default() += 1;
266
267                    // Determine band bin quantization scale
268                    let band_bin_quantize_scale = f32::powf(
269                        (BAND_BIN_QUANTIZE_SCALE_BASES[band_index]) as f32
270                            / 200.0f32,
271                        3.0f32,
272                    ) * (scaling_factor as f32)
273                        / (max_scaling_factor as f32)
274                        * 127.0f32
275                        * linear_band_energy
276                        * linear_band_energy;
277
278                    // Normalize, quantize, and encode band bins
279                    for bin_index in 0..num_bins {
280                        let bin = band_bins[bin_index as usize];
281                        let bin_q = (f32::round(
282                            bin / (band_energy + epsilon)
283                                * band_bin_quantize_scale,
284                        )) as i8;
285                        candidate_bin_qstream.push(bin_q);
286                        *candidate_bin_qfreqs.entry(bin_q).or_default() += 1;
287                    }
288
289                    band_bins = &band_bins[num_bins as usize..];
290                }
291
292                // Model the order 0 entropy of the quantized stream symbols in
293                // order to estimate the total bits used for encoding.
294                // Also adjust estimate slightly, as squishy (and likely other
295                // compressors) tend to find additional correlations not
296                // captured by this simple model
297                let band_energy_bits_estimate =
298                    order0_bits_estimate(&candidate_band_energy_freqs);
299                let bin_qbits_estimate =
300                    order0_bits_estimate(&candidate_bin_qfreqs);
301                let estimate_adjustment: f64 = 0.83;
302                let subframe_bits_estimate = (band_energy_bits_estimate
303                    + bin_qbits_estimate)
304                    * estimate_adjustment;
305
306                // Accept these candidate streams if this bit count estimate is
307                // closest to the target for the subframe
308                let target_bits_per_subframe_with_slack_bits =
309                    target_bits_per_subframe + slack_bits;
310                if scaling_factor == min_scaling_factor
311                    || f64::abs(
312                        subframe_bits_estimate
313                            - target_bits_per_subframe_with_slack_bits,
314                    ) < f64::abs(
315                        best_subframe_bits_estimate
316                            - target_bits_per_subframe_with_slack_bits,
317                    )
318                {
319                    best_quantized_band_energies =
320                        candidate_quantized_band_energies;
321                    best_band_energy_stream = candidate_band_energy_stream;
322                    best_bin_qstream = candidate_bin_qstream;
323                    best_subframe_bits_estimate = subframe_bits_estimate;
324                }
325            }
326
327            // Update quantized band energy predictions for next subframe
328            quantized_band_energy_predictions = best_quantized_band_energies;
329
330            // Output the best-performing parameters/coefficients to their
331            // respective streams
332            band_energy_stream.extend(best_band_energy_stream);
333            bin_qstream.extend(best_bin_qstream);
334
335            // Adjust slack bits depending on our estimated bits used for
336            // this subframe
337            slack_bits +=
338                target_bits_per_subframe - best_subframe_bits_estimate;
339
340            // Update total bits estimate
341            total_bits_estimate += best_subframe_bits_estimate;
342        }
343    }
344
345    // Concatenate streams
346    v.extend(window_mode_stream);
347    v.extend(bin_qstream.into_iter().map(|x| x as u8));
348    v.extend(band_energy_stream);
349
350    EncodeResult {
351        stream: v,
352        total_bits_estimate,
353    }
354}