1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
//! Standalone SILK frame decoding types and scaffolding.
//!
//! SILK (Skype Low Latency Audio Codec) is the speech codec used within Opus.
//! This module provides lightweight frame-level types for parsing SILK frame
//! headers and applying LPC synthesis, independent of the full Opus decoder
//! pipeline.
/// SILK operating bandwidth.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SilkBandwidth {
/// Narrowband — 8 kHz sample rate.
NarrowBand,
/// Medium band — 12 kHz sample rate.
MediumBand,
/// Wideband — 16 kHz sample rate.
WideBand,
/// Super-wideband — 24 kHz sample rate.
SuperWideBand,
}
impl SilkBandwidth {
/// Returns the sample rate in Hz associated with this bandwidth.
pub fn sample_rate(&self) -> u32 {
match self {
Self::NarrowBand => 8_000,
Self::MediumBand => 12_000,
Self::WideBand => 16_000,
Self::SuperWideBand => 24_000,
}
}
}
/// Parsed header fields from a SILK frame.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SilkFrameHeader {
/// Voice activity detection flag.
pub vad_flag: bool,
/// Low Bitrate Redundancy (LBRR) payload present.
pub lbrr_flag: bool,
/// Signal type: 0 = inactive, 1 = voiced, 2 = unvoiced.
pub signal_type: u8,
/// Quantization offset type (0 or 1).
pub quantization_offset: u8,
}
impl SilkFrameHeader {
/// Parses a `SilkFrameHeader` from the first byte(s) of a raw frame.
///
/// The SILK frame header is packed into the leading bits of the payload.
/// This parser reads the minimum information needed to scaffold frame
/// processing; full SILK decoding requires an entropy/range decoder.
///
/// Layout of byte 0:
/// ```text
/// Bit 7: VAD flag
/// Bit 6: LBRR flag
/// Bits 5-4: signal_type (0-2, values 0b11 treated as inactive)
/// Bit 3: quantization_offset
/// Bits 2-0: reserved / additional payload bits
/// ```
pub fn parse(data: &[u8]) -> Result<Self, String> {
if data.is_empty() {
return Err("SILK frame data is empty".to_string());
}
let b0 = data[0];
let vad_flag = (b0 & 0x80) != 0;
let lbrr_flag = (b0 & 0x40) != 0;
let raw_signal = (b0 >> 4) & 0x03;
let signal_type = if raw_signal > 2 { 0 } else { raw_signal };
let quantization_offset = (b0 >> 3) & 0x01;
Ok(Self {
vad_flag,
lbrr_flag,
signal_type,
quantization_offset,
})
}
}
/// LPC (Linear Predictive Coding) filter coefficients for one SILK subframe.
///
/// Coefficients are stored in Q12 fixed-point format (i.e. the value `4096`
/// represents 1.0).
#[derive(Debug, Clone)]
pub struct SilkLpcCoeffs {
/// LPC filter order (10 for narrowband/mediumband, 16 for wideband/super-wideband).
pub order: usize,
/// LPC filter coefficients in Q12 fixed-point.
pub coeffs: Vec<i16>,
}
impl SilkLpcCoeffs {
/// Creates a zeroed `SilkLpcCoeffs` with the given filter order.
pub fn new(order: usize) -> Self {
Self {
order,
coeffs: vec![0i16; order],
}
}
}
/// A decoded SILK frame.
#[derive(Debug, Clone)]
pub struct SilkFrame {
/// Parsed frame header.
pub header: SilkFrameHeader,
/// LPC coefficients used for synthesis.
pub lpc: SilkLpcCoeffs,
/// Decoded PCM samples (i16, linear).
pub samples: Vec<i16>,
/// Number of samples in this frame.
pub sample_count: usize,
}
impl SilkFrame {
/// Creates an empty `SilkFrame` with default-zeroed fields.
pub fn new() -> Self {
Self {
header: SilkFrameHeader {
vad_flag: false,
lbrr_flag: false,
signal_type: 0,
quantization_offset: 0,
},
lpc: SilkLpcCoeffs::new(16),
samples: Vec::new(),
sample_count: 0,
}
}
/// Returns the number of PCM samples in this frame.
pub fn sample_count(&self) -> usize {
self.sample_count
}
/// Returns the samples normalised to the range `[-1.0, 1.0]` as `f32`.
pub fn as_f32_samples(&self) -> Vec<f32> {
self.samples
.iter()
.map(|&s| s as f32 / i16::MAX as f32)
.collect()
}
}
/// SILK frame decoder scaffold.
///
/// This type parses the SILK frame header and exposes helpers for LPC
/// synthesis. Full entropy-coded SILK decoding is extremely complex and is
/// provided by the Opus implementation in `crate::opus::silk`. This struct
/// is intentionally lightweight and suitable for testing and scaffolding.
#[derive(Debug)]
pub struct SilkDecoder {
/// Operating bandwidth.
pub bandwidth: SilkBandwidth,
/// Expected frame size in samples.
pub frame_size: usize,
/// Previous output samples kept for LPC synthesis state (history).
pub prev_samples: Vec<i16>,
}
impl SilkDecoder {
/// Creates a new `SilkDecoder` for the given bandwidth.
pub fn new(bandwidth: SilkBandwidth) -> Self {
// SILK uses 20 ms frames.
let frame_size = (bandwidth.sample_rate() as usize) * 20 / 1000;
// LPC order is 16 for WB/SWB, 10 for NB/MB.
let lpc_order = match bandwidth {
SilkBandwidth::NarrowBand | SilkBandwidth::MediumBand => 10,
SilkBandwidth::WideBand | SilkBandwidth::SuperWideBand => 16,
};
Self {
bandwidth,
frame_size,
prev_samples: vec![0i16; lpc_order],
}
}
/// Parses the frame header and returns a `SilkFrame` with zeroed samples.
///
/// Full SILK decoding (excitation decoding, LTP, noise shaping …) is not
/// implemented here; the goal of this method is to validate the header and
/// set up the frame scaffold so that higher-level code can fill in the
/// decoded samples.
pub fn decode_frame(&mut self, data: &[u8]) -> Result<SilkFrame, String> {
let header = SilkFrameHeader::parse(data)?;
let lpc_order = self.prev_samples.len();
let lpc = SilkLpcCoeffs::new(lpc_order);
let samples = vec![0i16; self.frame_size];
let sample_count = self.frame_size;
Ok(SilkFrame {
header,
lpc,
samples,
sample_count,
})
}
/// Applies the LPC synthesis filter to an excitation signal.
///
/// The synthesis filter is:
/// ```text
/// s[n] = excitation[n] + sum_{k=0}^{order-1} (lpc.coeffs[k] * s[n-k-1]) / 4096
/// ```
/// where the division by 4096 converts Q12 fixed-point coefficients back
/// to the integer domain.
///
/// The decoder's `prev_samples` buffer is used as the initial state and
/// is updated to the last `order` samples of the output on return.
pub fn apply_lpc_synthesis(&mut self, excitation: &[i16], lpc: &SilkLpcCoeffs) -> Vec<i16> {
let order = lpc.order;
let n = excitation.len();
let mut output = vec![0i16; n];
// Combined history: prev_samples (oldest…newest) ++ output produced so far.
// We index as: index_in_history(-1) = prev_samples[order - 1], etc.
let history: Vec<i16> = self.prev_samples.clone();
for i in 0..n {
let mut acc: i64 = excitation[i] as i64;
for k in 0..order {
// s[n - k - 1]: look back k+1 samples.
let back = k + 1;
let sample = if back <= i {
output[i - back] as i64
} else {
// Still in the prev_samples history.
let hist_idx = order as isize - (back as isize - i as isize);
if hist_idx >= 0 && (hist_idx as usize) < history.len() {
history[hist_idx as usize] as i64
} else {
0i64
}
};
acc += (lpc.coeffs[k] as i64 * sample) >> 12;
}
output[i] = acc.clamp(i16::MIN as i64, i16::MAX as i64) as i16;
}
// Update history with the last `order` output samples.
let keep = order.min(n);
let src_start = n - keep;
for (dst, src) in self.prev_samples[order - keep..]
.iter_mut()
.zip(output[src_start..].iter())
{
*dst = *src;
}
output
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_silk_bandwidth_sample_rate_narrowband() {
assert_eq!(SilkBandwidth::NarrowBand.sample_rate(), 8_000);
}
#[test]
fn test_silk_bandwidth_sample_rate_mediumband() {
assert_eq!(SilkBandwidth::MediumBand.sample_rate(), 12_000);
}
#[test]
fn test_silk_bandwidth_sample_rate_wideband() {
assert_eq!(SilkBandwidth::WideBand.sample_rate(), 16_000);
}
#[test]
fn test_silk_bandwidth_sample_rate_superwideband() {
assert_eq!(SilkBandwidth::SuperWideBand.sample_rate(), 24_000);
}
#[test]
fn test_silk_frame_header_parse_basic() {
// Byte 0: VAD=1, LBRR=0, signal=01 (voiced), q_offset=0
// 1 0 01 0 000 = 0b10010000 = 0x90
let data = [0x90u8];
let hdr = SilkFrameHeader::parse(&data).expect("should succeed");
assert!(hdr.vad_flag);
assert!(!hdr.lbrr_flag);
assert_eq!(hdr.signal_type, 1);
assert_eq!(hdr.quantization_offset, 0);
}
#[test]
fn test_silk_frame_header_parse_empty_returns_error() {
let result = SilkFrameHeader::parse(&[]);
assert!(result.is_err());
}
#[test]
fn test_silk_decoder_new() {
let dec = SilkDecoder::new(SilkBandwidth::WideBand);
assert_eq!(dec.bandwidth, SilkBandwidth::WideBand);
// 16000 Hz * 20ms = 320 samples
assert_eq!(dec.frame_size, 320);
}
#[test]
fn test_silk_decoder_decode_frame() {
let mut dec = SilkDecoder::new(SilkBandwidth::NarrowBand);
let data = [0x00u8; 10];
let frame = dec.decode_frame(&data).expect("should succeed");
// 8000 Hz * 20ms = 160 samples
assert_eq!(frame.sample_count(), 160);
assert_eq!(frame.samples.len(), 160);
}
#[test]
fn test_silk_lpc_synthesis_zero_excitation_gives_zero_output() {
let mut dec = SilkDecoder::new(SilkBandwidth::NarrowBand);
let excitation = vec![0i16; 160];
let lpc = SilkLpcCoeffs::new(10);
let output = dec.apply_lpc_synthesis(&excitation, &lpc);
assert!(output.iter().all(|&s| s == 0));
}
#[test]
fn test_silk_frame_as_f32_samples_i16_max() {
let mut frame = SilkFrame::new();
frame.samples = vec![i16::MAX];
frame.sample_count = 1;
let f32s = frame.as_f32_samples();
assert!((f32s[0] - 1.0f32).abs() < 1e-4);
}
#[test]
fn test_silk_frame_as_f32_samples_i16_min() {
let mut frame = SilkFrame::new();
frame.samples = vec![i16::MIN];
frame.sample_count = 1;
let f32s = frame.as_f32_samples();
// i16::MIN as f32 / i16::MAX as f32 ≈ -1.00003…
assert!(f32s[0] < -0.999);
}
#[test]
fn test_silk_lpc_coeffs_new() {
let lpc = SilkLpcCoeffs::new(10);
assert_eq!(lpc.order, 10);
assert_eq!(lpc.coeffs.len(), 10);
assert!(lpc.coeffs.iter().all(|&c| c == 0));
}
}