oxideav-opus 0.0.5

Opus audio codec for oxideav — SILK + CELT decode (mono/stereo), CELT-only full-band encode, SILK-only encode (NB/MB/WB, mono+stereo, 10/20/40/60 ms)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
//! Opus encoder — CELT-only full-band + the full SILK-only config
//! matrix (configs 0..=11), mono and stereo, 10 / 20 / 40 / 60 ms.
//!
//! # Mode selection
//!
//! * [`OpusEncoder::new_celt_only_full_band`] — CELT-only fullband
//!   20 ms (config 31). Accepts 48 kHz mono/stereo input. Stereo is
//!   downmixed to mono before the CELT mono core; see the original
//!   CELT section below for the honest caveat about the stereo TOC bit.
//!
//! * [`SilkEncoder`] exposes one named constructor per (bandwidth,
//!   channels, duration) tuple:
//!   - bandwidth ∈ {NB, MB, WB} (8 / 12 / 16 kHz internal rates),
//!   - channels ∈ {mono, stereo},
//!   - duration ∈ {10, 20, 40, 60} ms.
//!
//!   That gives 24 constructors, covering all 12 SILK-only `config`
//!   values × stereo bit. Each accepts either the SILK internal rate
//!   or 48 kHz; the latter is downsampled by a box-average pre-filter.
//!   40 / 60 ms packets carry 2 / 3 back-to-back 20 ms SILK frame
//!   bodies per RFC 6716 §4.2.4, so they produce a single Opus packet
//!   with framing code 0 (not code-1/2/3 — the SILK frames share one
//!   TOC byte and one range-coder bitstream).
//!
//! [`OpusEncoder::new`] routes by the `CodecParameters::sample_rate`:
//! 48 kHz mono/stereo → CELT-only FB; anything else → `Unsupported`,
//! because switching to SILK invisibly would be a nasty foot-gun for
//! callers that expected 48 kHz output parity. To emit SILK packets,
//! construct a [`SilkEncoder`] explicitly.
//!
//! Hybrid (SILK+CELT) is not implemented on either path.
//!
//! # Packet layout (RFC 6716 §3)
//!
//! ```text
//!   [ TOC byte ] [ CELT bitstream bytes ... ]
//! ```
//!
//! where the TOC byte is `(config << 3) | (stereo << 2) | code` with
//! `config = 31`, `stereo ∈ {0, 1}`, `code = 0` (single frame).
//!
//! # Supported inputs
//!
//! * S16 / S16P / F32 / F32P sample formats.
//! * 48 kHz sample rate only.
//! * Mono (channels = 1) — native path.
//! * Stereo (channels = 2) — **downmixed to mono** before being fed to
//!   the mono-only CELT encoder; the TOC is emitted with `stereo = 0`.
//!   A real CELT stereo path (coupled L/R PVQ with intensity /
//!   dual-stereo) would be needed to honestly advertise `stereo = 1`
//!   in the TOC, and the `oxideav-celt` encoder is mono-only today —
//!   see its module docs. The signal survives and decodes cleanly as
//!   duplicated-mono on both channels; per-channel detail is lost.
//!
//! # Unsupported
//!
//! * Framing codes 1/2/3 (multi-frame packets) — not emitted. 40 / 60
//!   ms SILK packets *are* emitted via RFC §4.2.4's multiple-SILK-
//!   frames-per-Opus-frame mechanism (still code = 0).
//! * CELT 2.5 / 5 / 10 ms frame sizes.
//! * Hybrid (SILK+CELT) mode.
//! * More than 2 channels.

use std::collections::VecDeque;

use oxideav_celt::encoder::{CeltEncoder, FRAME_SAMPLES, SAMPLE_RATE};
use oxideav_core::Encoder;
use oxideav_core::{
    AudioFrame, CodecId, CodecParameters, Error, Frame, Packet, Result, SampleFormat, TimeBase,
};

/// `config` field value for CELT-only, fullband, 20 ms frames.
const OPUS_CONFIG_CELT_FB_20MS: u8 = 31;

/// Build a TOC byte for config 31 (CELT-only FB 20 ms), code-0 (single
/// frame packet), with the given stereo bit.
///
/// Layout (RFC 6716 §3.1): `config(5) | stereo(1) | code(2)`.
pub fn build_toc_byte(stereo: bool) -> u8 {
    let stereo_bit: u8 = if stereo { 1 } else { 0 };
    (OPUS_CONFIG_CELT_FB_20MS << 3) | (stereo_bit << 2) // code = 0 (single frame)
}

/// Number of PCM samples per 20 ms Opus/CELT frame at 48 kHz.
pub const OPUS_FRAME_SAMPLES: usize = 960;

pub struct OpusEncoder {
    /// Output-stream parameters (after any channel-count adjustments).
    out_params: CodecParameters,
    /// Channel count on the *input* frames (1 or 2). Stereo inputs are
    /// downmixed to mono before hitting the CELT encoder.
    input_channels: u16,
    /// The underlying mono CELT encoder.
    celt: CeltEncoder,
    /// Output packet queue (one Opus packet per 20 ms of input).
    output: VecDeque<Packet>,
    /// PTS counter (in 48 kHz samples).
    pts_counter: i64,
}

impl OpusEncoder {
    /// Build a new Opus encoder. Mode selection is purely driven by the
    /// sample rate in `params`: 48 kHz → CELT-only full-band 20 ms. Any
    /// other rate returns `Error::Unsupported`.
    ///
    /// For an explicit, mode-named entry point that keeps the call-site
    /// intent obvious, see [`OpusEncoder::new_celt_only_full_band`].
    pub fn new(params: &CodecParameters) -> Result<Self> {
        let channels = params.channels.unwrap_or(1);
        if channels == 0 || channels > 2 {
            return Err(Error::unsupported(format!(
                "opus encoder: only mono/stereo supported, got {channels}-channel input"
            )));
        }
        let sr = params.sample_rate.unwrap_or(SAMPLE_RATE);
        if sr != SAMPLE_RATE {
            return Err(Error::unsupported(format!(
                "opus encoder: input must be 48 kHz (got {sr}); resample before encoding"
            )));
        }

        // Drive the underlying CELT encoder as mono — stereo input is
        // downmixed on the way in. The CELT-mono path is the only one
        // implemented today.
        let mut celt_params = params.clone();
        celt_params.channels = Some(1);
        celt_params.sample_rate = Some(SAMPLE_RATE);
        // CeltEncoder expects its own codec id; clone the whole parameter
        // block and override the id so the inner encoder doesn't reject
        // us for a mismatch.
        celt_params.codec_id = CodecId::new(oxideav_celt::CODEC_ID_STR);
        let celt = CeltEncoder::new(&celt_params)?;

        // Output params: we report the *input* channel count so that the
        // downstream muxer keeps the packet's implied channel layout in
        // sync with what callers asked for. The bitstream body is always
        // a mono CELT frame though — see module docs.
        let mut out_params = params.clone();
        out_params.sample_rate = Some(SAMPLE_RATE);
        out_params.channels = Some(channels);

        Ok(Self {
            out_params,
            input_channels: channels,
            celt,
            output: VecDeque::new(),
            pts_counter: 0,
        })
    }

    /// Explicit CELT-only full-band (48 kHz, 20 ms) constructor. Equivalent
    /// to [`OpusEncoder::new`] with `params.sample_rate = Some(48_000)`,
    /// but documents the intent at the call site. Returns `Unsupported`
    /// if the caller passed a non-48 kHz rate.
    ///
    /// Channels must be 1 or 2. Stereo input is downmixed to mono — see
    /// the module docs for why.
    pub fn new_celt_only_full_band(params: &CodecParameters) -> Result<Self> {
        let sr = params.sample_rate.unwrap_or(SAMPLE_RATE);
        if sr != SAMPLE_RATE {
            return Err(Error::unsupported(format!(
                "opus encoder (CELT-only FB): input must be 48 kHz, got {sr}"
            )));
        }
        Self::new(params)
    }

    /// Pull all pending CELT packets out of the underlying encoder, wrap
    /// each in an Opus TOC byte, and push the resulting Opus packets to
    /// the output queue.
    fn drain_celt(&mut self) -> Result<()> {
        // CeltEncoder is mono-only so stereo_bit is always 0 here.
        let toc = build_toc_byte(false);
        loop {
            match self.celt.receive_packet() {
                Ok(celt_pkt) => {
                    let mut data = Vec::with_capacity(1 + celt_pkt.data.len());
                    data.push(toc);
                    data.extend_from_slice(&celt_pkt.data);
                    let tb = TimeBase::new(1, SAMPLE_RATE as i64);
                    let pts = self.pts_counter;
                    self.pts_counter += OPUS_FRAME_SAMPLES as i64;
                    let pkt = Packet::new(0, tb, data)
                        .with_pts(pts)
                        .with_duration(OPUS_FRAME_SAMPLES as i64);
                    self.output.push_back(pkt);
                }
                Err(Error::NeedMore) => return Ok(()),
                Err(e) => return Err(e),
            }
        }
    }
}

impl Encoder for OpusEncoder {
    fn codec_id(&self) -> &CodecId {
        &self.out_params.codec_id
    }

    fn output_params(&self) -> &CodecParameters {
        &self.out_params
    }

    fn send_frame(&mut self, frame: &Frame) -> Result<()> {
        let audio = match frame {
            Frame::Audio(a) => a,
            _ => {
                return Err(Error::invalid(
                    "opus encoder: expected audio frame, got video",
                ))
            }
        };
        if audio.sample_rate != SAMPLE_RATE {
            return Err(Error::unsupported(format!(
                "opus encoder: input must be 48 kHz (got {}); resample before encoding",
                audio.sample_rate
            )));
        }
        if audio.channels != self.input_channels {
            return Err(Error::invalid(format!(
                "opus encoder: frame channels ({}) differ from configured input channels ({})",
                audio.channels, self.input_channels
            )));
        }

        // Flatten the input into a mono f32 buffer regardless of whether
        // the container was mono (passthrough) or stereo (downmix).
        let mono = extract_mono_f32(audio)?;

        // Feed the CELT encoder as a single mono F32 frame.
        let mut bytes = Vec::with_capacity(mono.len() * 4);
        for &s in &mono {
            bytes.extend_from_slice(&s.to_le_bytes());
        }
        let celt_frame = Frame::Audio(AudioFrame {
            format: SampleFormat::F32,
            channels: 1,
            sample_rate: SAMPLE_RATE,
            samples: mono.len() as u32,
            pts: audio.pts,
            time_base: TimeBase::new(1, SAMPLE_RATE as i64),
            data: vec![bytes],
        });
        self.celt.send_frame(&celt_frame)?;
        self.drain_celt()
    }

    fn receive_packet(&mut self) -> Result<Packet> {
        if let Some(p) = self.output.pop_front() {
            Ok(p)
        } else {
            Err(Error::NeedMore)
        }
    }

    fn flush(&mut self) -> Result<()> {
        self.celt.flush()?;
        self.drain_celt()?;
        Ok(())
    }
}

/// Decode the `AudioFrame`'s sample bytes into a mono f32 buffer, applying
/// a stereo → mono downmix (simple mean) when needed. Supports S16 and
/// F32 (interleaved or planar).
fn extract_mono_f32(audio: &AudioFrame) -> Result<Vec<f32>> {
    let n = audio.samples as usize;
    let ch = audio.channels as usize;
    if ch == 0 {
        return Err(Error::invalid("opus encoder: 0-channel audio frame"));
    }
    let mut out = vec![0f32; n];
    match audio.format {
        SampleFormat::S16 => {
            // Interleaved S16.
            let bytes = &audio.data[0];
            let needed = n * ch * 2;
            if bytes.len() < needed {
                return Err(Error::invalid(
                    "opus encoder: S16 input shorter than declared sample count",
                ));
            }
            for i in 0..n {
                let mut acc = 0i32;
                for c in 0..ch {
                    let off = (i * ch + c) * 2;
                    let s = i16::from_le_bytes([bytes[off], bytes[off + 1]]);
                    acc += s as i32;
                }
                out[i] = (acc as f32) / (ch as f32 * 32768.0);
            }
        }
        SampleFormat::S16P => {
            // One plane per channel. Mono = plane 0, stereo = two planes.
            if audio.data.len() < ch {
                return Err(Error::invalid("opus encoder: S16P input missing planes"));
            }
            for i in 0..n {
                let mut acc = 0i32;
                for c in 0..ch {
                    let plane = &audio.data[c];
                    if plane.len() < n * 2 {
                        return Err(Error::invalid(
                            "opus encoder: S16P plane shorter than declared sample count",
                        ));
                    }
                    let off = i * 2;
                    let s = i16::from_le_bytes([plane[off], plane[off + 1]]);
                    acc += s as i32;
                }
                out[i] = (acc as f32) / (ch as f32 * 32768.0);
            }
        }
        SampleFormat::F32 => {
            let bytes = &audio.data[0];
            let needed = n * ch * 4;
            if bytes.len() < needed {
                return Err(Error::invalid(
                    "opus encoder: F32 input shorter than declared sample count",
                ));
            }
            for i in 0..n {
                let mut acc = 0f32;
                for c in 0..ch {
                    let off = (i * ch + c) * 4;
                    acc += f32::from_le_bytes([
                        bytes[off],
                        bytes[off + 1],
                        bytes[off + 2],
                        bytes[off + 3],
                    ]);
                }
                out[i] = acc / ch as f32;
            }
        }
        SampleFormat::F32P => {
            if audio.data.len() < ch {
                return Err(Error::invalid("opus encoder: F32P input missing planes"));
            }
            for i in 0..n {
                let mut acc = 0f32;
                for c in 0..ch {
                    let plane = &audio.data[c];
                    if plane.len() < n * 4 {
                        return Err(Error::invalid(
                            "opus encoder: F32P plane shorter than declared sample count",
                        ));
                    }
                    let off = i * 4;
                    acc += f32::from_le_bytes([
                        plane[off],
                        plane[off + 1],
                        plane[off + 2],
                        plane[off + 3],
                    ]);
                }
                out[i] = acc / ch as f32;
            }
        }
        other => {
            return Err(Error::unsupported(format!(
                "opus encoder: sample format {:?} not supported (use S16 / S16P / F32 / F32P)",
                other
            )));
        }
    }
    // Sanity: the CELT encoder always consumes `FRAME_SAMPLES` (960) per
    // frame. We don't enforce `n == FRAME_SAMPLES` here because the
    // underlying CELT encoder buffers up to a frame boundary internally
    // — but we do surface any non-20-ms chunking downstream as Unsupported
    // there. The caller is free to send any number of samples per frame
    // as long as the aggregate ends on a frame boundary before `flush()`.
    let _ = FRAME_SAMPLES;
    Ok(out)
}

pub fn make_encoder(params: &CodecParameters) -> Result<Box<dyn Encoder>> {
    Ok(Box::new(OpusEncoder::new(params)?))
}

// ---------------------------------------------------------------------
// SILK encoder — NB / MB / WB mono + NB stereo, 20 ms.
// ---------------------------------------------------------------------

/// `config` field value for SILK-only, narrowband, 20 ms frames (§3.1
/// Table 2).
pub const OPUS_CONFIG_SILK_NB_20MS: u8 = 1;
/// `config` field value for SILK-only, mediumband, 20 ms frames.
pub const OPUS_CONFIG_SILK_MB_20MS: u8 = 5;
/// `config` field value for SILK-only, wideband, 20 ms frames.
pub const OPUS_CONFIG_SILK_WB_20MS: u8 = 9;

/// Number of PCM samples per 20 ms SILK NB frame at the internal 8 kHz
/// rate.
pub const SILK_NB_FRAME_SAMPLES_INTERNAL: usize = 160;
/// Samples per 20 ms SILK MB frame at the internal 12 kHz rate.
pub const SILK_MB_FRAME_SAMPLES_INTERNAL: usize = 240;
/// Samples per 20 ms SILK WB frame at the internal 16 kHz rate.
pub const SILK_WB_FRAME_SAMPLES_INTERNAL: usize = 320;

/// Number of PCM samples per 20 ms frame at the Opus output rate of
/// 48 kHz (for PTS accounting).
pub const SILK_FRAME_SAMPLES_48K: usize = 960;

/// Internal (SILK) rate for NB.
pub const SILK_NB_RATE: u32 = 8_000;
/// Internal (SILK) rate for MB.
pub const SILK_MB_RATE: u32 = 12_000;
/// Internal (SILK) rate for WB.
pub const SILK_WB_RATE: u32 = 16_000;

/// Build a TOC byte for a SILK-only narrowband 20 ms (config 1) packet.
///
/// Layout (RFC 6716 §3.1): `config(5) | stereo(1) | code(2)`.
pub fn build_silk_nb_20ms_toc(stereo: bool) -> u8 {
    let stereo_bit: u8 = if stereo { 1 } else { 0 };
    (OPUS_CONFIG_SILK_NB_20MS << 3) | (stereo_bit << 2) // code = 0
}

/// Build a TOC byte for a SILK-only mediumband 20 ms (config 5) packet.
pub fn build_silk_mb_20ms_toc(stereo: bool) -> u8 {
    let stereo_bit: u8 = if stereo { 1 } else { 0 };
    (OPUS_CONFIG_SILK_MB_20MS << 3) | (stereo_bit << 2)
}

/// Build a TOC byte for a SILK-only wideband 20 ms (config 9) packet.
pub fn build_silk_wb_20ms_toc(stereo: bool) -> u8 {
    let stereo_bit: u8 = if stereo { 1 } else { 0 };
    (OPUS_CONFIG_SILK_WB_20MS << 3) | (stereo_bit << 2)
}

/// Opus audio bandwidth for a SILK-only mode.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum SilkBw {
    Nb,
    Mb,
    Wb,
}

impl SilkBw {
    fn internal_rate(self) -> u32 {
        match self {
            SilkBw::Nb => SILK_NB_RATE,
            SilkBw::Mb => SILK_MB_RATE,
            SilkBw::Wb => SILK_WB_RATE,
        }
    }
    /// Samples per 20 ms SILK frame at the internal rate.
    fn frame_samples_20ms(self) -> usize {
        match self {
            SilkBw::Nb => SILK_NB_FRAME_SAMPLES_INTERNAL,
            SilkBw::Mb => SILK_MB_FRAME_SAMPLES_INTERNAL,
            SilkBw::Wb => SILK_WB_FRAME_SAMPLES_INTERNAL,
        }
    }
}

/// Concrete SILK mode this `SilkEncoder` instance emits.
///
/// A SILK mode is the tuple of (bandwidth, channels, duration). Together
/// they fix the TOC `config` field (0..=11) plus the stereo bit. The
/// decoder's frame-size mapping is in RFC 6716 Table 2.
#[derive(Copy, Clone, Debug)]
struct SilkMode {
    bw: SilkBw,
    stereo: bool,
    /// Opus frame duration in milliseconds: 10, 20, 40, or 60.
    duration_ms: u32,
}

impl SilkMode {
    fn new(bw: SilkBw, stereo: bool, duration_ms: u32) -> Self {
        debug_assert!(matches!(duration_ms, 10 | 20 | 40 | 60));
        Self {
            bw,
            stereo,
            duration_ms,
        }
    }
    /// RFC 6716 Table 2 `config` field for this mode.
    fn config(self) -> u8 {
        // Each bandwidth occupies a block of 4 configs: NB=0..=3,
        // MB=4..=7, WB=8..=11. Within each block the order is
        // 10/20/40/60 ms.
        let base = match self.bw {
            SilkBw::Nb => 0u8,
            SilkBw::Mb => 4,
            SilkBw::Wb => 8,
        };
        let offset = match self.duration_ms {
            10 => 0,
            20 => 1,
            40 => 2,
            60 => 3,
            _ => unreachable!(),
        };
        base + offset
    }
    fn toc_byte(self) -> u8 {
        let stereo_bit: u8 = if self.stereo { 1 } else { 0 };
        (self.config() << 3) | (stereo_bit << 2)
    }
    fn internal_rate(self) -> u32 {
        self.bw.internal_rate()
    }
    /// Number of 20 ms SILK frames packed into this Opus frame per
    /// RFC §4.2.4. 10 ms = 1 (half-length), 20 ms = 1, 40 ms = 2,
    /// 60 ms = 3.
    fn silk_frames_per_packet(self) -> usize {
        match self.duration_ms {
            10 | 20 => 1,
            40 => 2,
            60 => 3,
            _ => unreachable!(),
        }
    }
    /// Number of sub-frames in each embedded SILK frame (2 for 10 ms,
    /// 4 for 20/40/60 ms).
    fn subframes_per_silk_frame(self) -> usize {
        if self.duration_ms == 10 {
            2
        } else {
            4
        }
    }
    /// Total internal-rate samples carried by this Opus frame.
    fn frame_samples_internal(self) -> usize {
        let per_silk = match self.duration_ms {
            10 => self.bw.frame_samples_20ms() / 2,
            _ => self.bw.frame_samples_20ms(),
        };
        per_silk * self.silk_frames_per_packet()
    }
    /// Samples per *embedded* SILK frame at the internal rate (one of
    /// the 1/2/3 blocks that make up a 10/20/40/60 ms Opus frame).
    fn samples_per_silk_frame(self) -> usize {
        self.frame_samples_internal() / self.silk_frames_per_packet()
    }
    /// PCM samples per Opus frame at the 48 kHz output rate (for PTS
    /// accounting).
    fn frame_samples_48k(self) -> usize {
        match self.duration_ms {
            10 => 480,
            20 => 960,
            40 => 1920,
            60 => 2880,
            _ => unreachable!(),
        }
    }
    fn input_channels(self) -> u16 {
        if self.stereo {
            2
        } else {
            1
        }
    }
    fn is_stereo(self) -> bool {
        self.stereo
    }
    /// Bytes of range-encoder storage to allocate per packet. Sized so
    /// the MVP per-sample nibble carrier fits with headroom; stereo
    /// doubles the mono budget. 40 / 60 ms packets carry 2 / 3 back-to-
    /// back SILK frame bodies, so the budget scales linearly.
    fn buffer_bytes(self) -> u32 {
        let samples = self.frame_samples_internal();
        // ~17 bits per sample worst-case (nibble+nibble + sign), plus
        // headers. Round up to a 64-byte multiple with 2× headroom.
        let base = (samples * 17) / 8 + 128;
        let doubled = if self.is_stereo() { base * 2 } else { base };
        doubled.next_multiple_of(64).max(384) as u32
    }
    /// 48 kHz → internal-rate downsample ratio (integer).
    fn downsample_ratio(self) -> usize {
        (SAMPLE_RATE / self.internal_rate()) as usize
    }
}

/// SILK-mode Opus encoder — covers the full SILK-only config matrix
/// (configs 0..=11), mono and stereo, 10 / 20 / 40 / 60 ms frames.
///
/// Emits a TOC byte matching the configured mode followed by the SILK
/// bitstream described in [`crate::silk::encoder`]. Accepts either the
/// SILK internal rate (8 kHz / 12 kHz / 16 kHz for NB / MB / WB) or the
/// 48 kHz Opus output rate; non-internal input is downsampled by a
/// simple box-average pre-filter.
///
/// Named entry points (one per (bandwidth, channels, duration) tuple):
///
/// * 20 ms mono: [`SilkEncoder::new_nb_mono_20ms`], `new_mb_mono_20ms`,
///   `new_wb_mono_20ms` (configs 1, 5, 9).
/// * 20 ms stereo: [`SilkEncoder::new_nb_stereo_20ms`],
///   `new_mb_stereo_20ms`, `new_wb_stereo_20ms` (configs 1/5/9 + stereo
///   bit). Runs a mid/side pair of [`SilkFrameEncoder`]s and emits the
///   RFC §4.2.7.1 prediction header.
/// * 10 ms mono + stereo: configs 0, 4, 8 (half the sub-frame count
///   per embedded SILK frame).
/// * 40 ms mono + stereo: configs 2, 6, 10 — packet carries 2 back-to-
///   back 20 ms SILK frame bodies per RFC §4.2.4.
/// * 60 ms mono + stereo: configs 3, 7, 11 — 3 back-to-back 20 ms SILK
///   frame bodies.
///
/// Round-trip SNR > 20 dB on speech-like input through the crate's own
/// SILK decoder for every 20 ms mode (see `encoder_roundtrip.rs`).
/// 10 ms frames lose a bit of first-frame SNR because the LPC history
/// starts cold; 40/60 ms frames match the 20 ms bar because each
/// embedded SILK frame carries its own LPC history.
///
/// Out of scope for this pass: voiced/LTP path, LBRR (redundancy),
/// Hybrid.
pub struct SilkEncoder {
    out_params: CodecParameters,
    mode: SilkMode,
    /// Per-frame SILK encoder for the mid (or mono) channel.
    silk_mid: crate::silk::encoder::SilkFrameEncoder,
    /// Side-channel encoder (stereo only).
    silk_side: Option<crate::silk::encoder::SilkFrameEncoder>,
    /// Pending internal-rate samples. For stereo, interleaved L/R.
    pending_internal: VecDeque<f32>,
    /// Expected input sample rate (internal or 48 kHz).
    input_sample_rate: u32,
    /// Output packet queue.
    output: VecDeque<Packet>,
    /// PTS counter in 48 kHz samples.
    pts_counter: i64,
}

impl SilkEncoder {
    // ---- 20 ms mono + stereo, all 3 bandwidths ---------------------

    /// Build a SILK NB mono 20 ms encoder. Input: 8 kHz or 48 kHz mono.
    pub fn new_nb_mono_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, false, 20))
    }

    /// Build a SILK MB mono 20 ms encoder. Input: 12 kHz or 48 kHz mono.
    pub fn new_mb_mono_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, false, 20))
    }

    /// Build a SILK WB mono 20 ms encoder. Input: 16 kHz or 48 kHz mono.
    pub fn new_wb_mono_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, false, 20))
    }

    /// Build a SILK NB stereo 20 ms encoder. Input: 8 kHz or 48 kHz
    /// stereo (interleaved L/R). Emits a mid/side-coded packet with the
    /// stereo prediction header from RFC §4.2.7.1.
    pub fn new_nb_stereo_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, true, 20))
    }

    /// MB stereo 20 ms (config 5 + stereo bit).
    pub fn new_mb_stereo_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, true, 20))
    }

    /// WB stereo 20 ms (config 9 + stereo bit).
    pub fn new_wb_stereo_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, true, 20))
    }

    // ---- 10 ms mono + stereo, all 3 bandwidths ---------------------

    /// NB mono 10 ms (config 0). Input: 8 kHz or 48 kHz mono.
    pub fn new_nb_mono_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, false, 10))
    }

    /// MB mono 10 ms (config 4). Input: 12 kHz or 48 kHz mono.
    pub fn new_mb_mono_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, false, 10))
    }

    /// WB mono 10 ms (config 8). Input: 16 kHz or 48 kHz mono.
    pub fn new_wb_mono_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, false, 10))
    }

    /// NB stereo 10 ms (config 0 + stereo bit).
    pub fn new_nb_stereo_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, true, 10))
    }

    /// MB stereo 10 ms (config 4 + stereo bit).
    pub fn new_mb_stereo_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, true, 10))
    }

    /// WB stereo 10 ms (config 8 + stereo bit).
    pub fn new_wb_stereo_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, true, 10))
    }

    // ---- 40 ms mono + stereo, all 3 bandwidths ---------------------
    //
    // 40 ms Opus frames contain 2 back-to-back 20 ms SILK frames per
    // RFC §4.2.4. The header VAD/LBRR flags span both SILK frames.

    /// NB mono 40 ms (config 2).
    pub fn new_nb_mono_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, false, 40))
    }

    /// MB mono 40 ms (config 6).
    pub fn new_mb_mono_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, false, 40))
    }

    /// WB mono 40 ms (config 10).
    pub fn new_wb_mono_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, false, 40))
    }

    /// NB stereo 40 ms.
    pub fn new_nb_stereo_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, true, 40))
    }

    /// MB stereo 40 ms.
    pub fn new_mb_stereo_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, true, 40))
    }

    /// WB stereo 40 ms.
    pub fn new_wb_stereo_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, true, 40))
    }

    // ---- 60 ms mono + stereo, all 3 bandwidths ---------------------

    /// NB mono 60 ms (config 3).
    pub fn new_nb_mono_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, false, 60))
    }

    /// MB mono 60 ms (config 7).
    pub fn new_mb_mono_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, false, 60))
    }

    /// WB mono 60 ms (config 11).
    pub fn new_wb_mono_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, false, 60))
    }

    /// NB stereo 60 ms.
    pub fn new_nb_stereo_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, true, 60))
    }

    /// MB stereo 60 ms.
    pub fn new_mb_stereo_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, true, 60))
    }

    /// WB stereo 60 ms.
    pub fn new_wb_stereo_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, true, 60))
    }

    fn build_frame_encoder(mode: SilkMode) -> crate::silk::encoder::SilkFrameEncoder {
        let bw_params = match mode.bw {
            SilkBw::Nb => crate::silk::encoder::BandwidthParams::nb(),
            SilkBw::Mb => crate::silk::encoder::BandwidthParams::mb(),
            SilkBw::Wb => crate::silk::encoder::BandwidthParams::wb(),
        };
        let subframes = mode.subframes_per_silk_frame();
        crate::silk::encoder::SilkFrameEncoder::new_with_subframes(bw_params, subframes)
    }

    fn new_mode(params: &CodecParameters, mode: SilkMode) -> Result<Self> {
        let channels = params.channels.unwrap_or(mode.input_channels());
        if channels != mode.input_channels() {
            return Err(Error::unsupported(format!(
                "SILK encoder: {:?} expects {}-channel input, got {channels} channels",
                mode,
                mode.input_channels()
            )));
        }
        let sr = params.sample_rate.unwrap_or(SAMPLE_RATE);
        let internal = mode.internal_rate();
        if sr != internal && sr != SAMPLE_RATE {
            return Err(Error::unsupported(format!(
                "SILK encoder: {mode:?} expects {internal} Hz or 48 kHz input, got {sr} Hz"
            )));
        }
        let silk_mid = Self::build_frame_encoder(mode);
        let silk_side = if mode.is_stereo() {
            Some(Self::build_frame_encoder(mode))
        } else {
            None
        };

        let mut out_params = params.clone();
        out_params.sample_rate = Some(SAMPLE_RATE);
        out_params.channels = Some(mode.input_channels());
        let per_frame_items =
            mode.frame_samples_internal() * (if mode.is_stereo() { 2 } else { 1 });

        Ok(Self {
            out_params,
            mode,
            silk_mid,
            silk_side,
            pending_internal: VecDeque::with_capacity(per_frame_items * 2),
            input_sample_rate: sr,
            output: VecDeque::new(),
            pts_counter: 0,
        })
    }

    fn drain_frames(&mut self) -> Result<()> {
        let samples_per_frame = self.mode.frame_samples_internal();
        let per_frame_items = samples_per_frame * (if self.mode.is_stereo() { 2 } else { 1 });
        while self.pending_internal.len() >= per_frame_items {
            if self.mode.is_stereo() {
                let mut left = Vec::with_capacity(samples_per_frame);
                let mut right = Vec::with_capacity(samples_per_frame);
                for _ in 0..samples_per_frame {
                    left.push(self.pending_internal.pop_front().unwrap_or(0.0));
                    right.push(self.pending_internal.pop_front().unwrap_or(0.0));
                }
                let pkt = self.encode_one_stereo_frame(&left, &right)?;
                self.output.push_back(pkt);
            } else {
                let mut frame = Vec::with_capacity(samples_per_frame);
                for _ in 0..samples_per_frame {
                    frame.push(self.pending_internal.pop_front().unwrap_or(0.0));
                }
                let pkt = self.encode_one_mono_frame(&frame)?;
                self.output.push_back(pkt);
            }
        }
        Ok(())
    }

    /// Emit the shared VAD + LBRR header at the top of an Opus frame
    /// per RFC §4.2.3 / §4.2.4. Layout (from libopus `silk_Decode`):
    ///
    /// ```text
    ///   for each internal channel n:
    ///     for each packet frame i:
    ///       vad_flags[n][i] = ec_dec_bit_logp(1)
    ///     lbrr_flag[n]      = ec_dec_bit_logp(1)
    /// ```
    ///
    /// We always emit `vad = 1` (active frame) and `lbrr = 0` (no
    /// redundancy). For stereo the mid channel's bits come first,
    /// then the side channel's. For multi-frame packets (40 / 60 ms)
    /// the VAD bits for all internal frames are emitted before the
    /// LBRR flag (one LBRR flag per channel, regardless of frame
    /// count).
    fn emit_shared_header(&self, enc: &mut oxideav_celt::range_encoder::RangeEncoder) {
        let n_silk_frames = self.mode.silk_frames_per_packet();
        let n_channels = if self.mode.is_stereo() { 2 } else { 1 };
        for _ch in 0..n_channels {
            for _i in 0..n_silk_frames {
                enc.encode_bit_logp(true, 1); // VAD = 1
            }
            enc.encode_bit_logp(false, 1); // LBRR = 0
        }
    }

    fn encode_one_mono_frame(&mut self, pcm_internal: &[f32]) -> Result<Packet> {
        debug_assert_eq!(pcm_internal.len(), self.mode.frame_samples_internal());
        let mut re = oxideav_celt::range_encoder::RangeEncoder::new(self.mode.buffer_bytes());

        self.emit_shared_header(&mut re);

        // Emit 1 / 2 / 3 back-to-back SILK frame bodies. For 10 and
        // 20 ms Opus frames this is just one body.
        let per_silk = self.mode.samples_per_silk_frame();
        let n = self.mode.silk_frames_per_packet();
        for i in 0..n {
            let start = i * per_silk;
            self.silk_mid
                .encode_frame_body(&pcm_internal[start..start + per_silk], &mut re)?;
        }

        let body = re
            .done()
            .map_err(|e| Error::other(format!("SILK encoder: {e}")))?;
        let body = strip_trailing_zeros(body);
        self.finish_packet(body)
    }

    fn encode_one_stereo_frame(&mut self, left: &[f32], right: &[f32]) -> Result<Packet> {
        debug_assert_eq!(left.len(), right.len());
        debug_assert_eq!(left.len(), self.mode.frame_samples_internal());
        let mut re = oxideav_celt::range_encoder::RangeEncoder::new(self.mode.buffer_bytes());

        self.emit_shared_header(&mut re);

        // For stereo, the mid/side split is done *per 20 ms SILK sub-
        // frame* because the decoder emits one stereo prediction header
        // + one mid body + one side body per sub-frame (RFC §4.2.4).
        let per_silk = self.mode.samples_per_silk_frame();
        let n = self.mode.silk_frames_per_packet();
        for i in 0..n {
            let start = i * per_silk;
            let lc = &left[start..start + per_silk];
            let rc = &right[start..start + per_silk];

            // Stereo prediction weights (0, 0) for the MVP — see the
            // comment in the 20 ms path.
            let (mid, side) = crate::silk::encoder::stereo_mid_side(lc, rc);
            crate::silk::encoder::encode_stereo_pred_weights(&mut re, [0, 0]);

            // Mid then side body. The decoder reads the mid-only flag
            // only when the side VAD is 0; we emit VAD=1 for the side
            // channel (see `emit_shared_header`), so no extra bit.
            self.silk_mid.encode_frame_body(&mid, &mut re)?;
            let side_enc = self
                .silk_side
                .as_mut()
                .ok_or_else(|| Error::other("SILK stereo encoder: missing side state"))?;
            side_enc.encode_frame_body(&side, &mut re)?;
        }

        let body = re
            .done()
            .map_err(|e| Error::other(format!("SILK encoder: {e}")))?;
        let body = strip_trailing_zeros(body);
        self.finish_packet(body)
    }

    fn finish_packet(&mut self, body: Vec<u8>) -> Result<Packet> {
        let toc = self.mode.toc_byte();
        let mut data = Vec::with_capacity(1 + body.len());
        data.push(toc);
        data.extend_from_slice(&body);

        let tb = TimeBase::new(1, SAMPLE_RATE as i64);
        let pts = self.pts_counter;
        let samples_48k = self.mode.frame_samples_48k() as i64;
        self.pts_counter += samples_48k;
        Ok(Packet::new(0, tb, data)
            .with_pts(pts)
            .with_duration(samples_48k))
    }
}

/// Trim trailing zero bytes from a range-encoded buffer. The last
/// non-zero byte of the main bitstream + the optional back-buffer
/// bits fully determine the decoded symbols; any trailing zeros are
/// padding the CELT range encoder writes to its allocated storage.
fn strip_trailing_zeros(mut v: Vec<u8>) -> Vec<u8> {
    while v.len() > 1 && v.last() == Some(&0) {
        v.pop();
    }
    v
}

impl Encoder for SilkEncoder {
    fn codec_id(&self) -> &CodecId {
        &self.out_params.codec_id
    }

    fn output_params(&self) -> &CodecParameters {
        &self.out_params
    }

    fn send_frame(&mut self, frame: &Frame) -> Result<()> {
        let audio = match frame {
            Frame::Audio(a) => a,
            _ => {
                return Err(Error::invalid(
                    "SILK encoder: expected audio frame, got video",
                ))
            }
        };
        if audio.channels != self.mode.input_channels() {
            return Err(Error::invalid(format!(
                "SILK encoder: frame channels ({}) differ from configured input channels ({})",
                audio.channels,
                self.mode.input_channels()
            )));
        }
        if audio.sample_rate != self.input_sample_rate {
            return Err(Error::unsupported(format!(
                "SILK encoder: input sample rate ({}) differs from configured rate ({}); reconfigure or resample first",
                audio.sample_rate, self.input_sample_rate
            )));
        }

        // Extract f32 samples. Mono modes feed `extract_mono_f32`; the
        // stereo mode keeps per-channel planes interleaved so the
        // caller-side mid/side split stays bit-exact.
        let internal_items_per_sample = if self.mode.is_stereo() { 2 } else { 1 };
        let internal_samples: Vec<f32> = if self.mode.is_stereo() {
            let stereo = extract_stereo_f32(audio)?;
            if audio.sample_rate == self.mode.internal_rate() {
                stereo
            } else {
                downsample_box_interleaved(&stereo, self.mode.downsample_ratio(), 2)
            }
        } else {
            let mono = extract_mono_f32(audio)?;
            if audio.sample_rate == self.mode.internal_rate() {
                mono
            } else {
                downsample_box(&mono, self.mode.downsample_ratio())
            }
        };

        debug_assert_eq!(
            internal_items_per_sample * (internal_samples.len() / internal_items_per_sample),
            internal_samples.len()
        );
        self.pending_internal.extend(&internal_samples);
        self.drain_frames()
    }

    fn receive_packet(&mut self) -> Result<Packet> {
        if let Some(p) = self.output.pop_front() {
            Ok(p)
        } else {
            Err(Error::NeedMore)
        }
    }

    fn flush(&mut self) -> Result<()> {
        if !self.pending_internal.is_empty() {
            let per_frame_items =
                self.mode.frame_samples_internal() * (if self.mode.is_stereo() { 2 } else { 1 });
            while self.pending_internal.len() % per_frame_items != 0 {
                self.pending_internal.push_back(0.0);
            }
            self.drain_frames()?;
        }
        Ok(())
    }
}

/// Extract an interleaved L/R f32 buffer from an `AudioFrame`. Supports
/// the same sample formats as [`extract_mono_f32`] but returns
/// `samples * 2` floats, preserving per-channel detail.
fn extract_stereo_f32(audio: &AudioFrame) -> Result<Vec<f32>> {
    let n = audio.samples as usize;
    let ch = audio.channels as usize;
    if ch != 2 {
        return Err(Error::invalid(format!(
            "SILK stereo encoder: expected 2-channel input, got {ch}"
        )));
    }
    let mut out = vec![0f32; n * 2];
    match audio.format {
        SampleFormat::S16 => {
            let bytes = &audio.data[0];
            let needed = n * 2 * 2;
            if bytes.len() < needed {
                return Err(Error::invalid(
                    "SILK stereo encoder: S16 input shorter than declared sample count",
                ));
            }
            for i in 0..n {
                for c in 0..2 {
                    let off = (i * 2 + c) * 2;
                    let s = i16::from_le_bytes([bytes[off], bytes[off + 1]]);
                    out[i * 2 + c] = s as f32 / 32768.0;
                }
            }
        }
        SampleFormat::S16P => {
            if audio.data.len() < 2 {
                return Err(Error::invalid("SILK stereo encoder: S16P missing planes"));
            }
            for i in 0..n {
                for c in 0..2 {
                    let plane = &audio.data[c];
                    if plane.len() < n * 2 {
                        return Err(Error::invalid(
                            "SILK stereo encoder: S16P plane shorter than declared sample count",
                        ));
                    }
                    let off = i * 2;
                    let s = i16::from_le_bytes([plane[off], plane[off + 1]]);
                    out[i * 2 + c] = s as f32 / 32768.0;
                }
            }
        }
        SampleFormat::F32 => {
            let bytes = &audio.data[0];
            let needed = n * 2 * 4;
            if bytes.len() < needed {
                return Err(Error::invalid(
                    "SILK stereo encoder: F32 input shorter than declared sample count",
                ));
            }
            for i in 0..n {
                for c in 0..2 {
                    let off = (i * 2 + c) * 4;
                    out[i * 2 + c] = f32::from_le_bytes([
                        bytes[off],
                        bytes[off + 1],
                        bytes[off + 2],
                        bytes[off + 3],
                    ]);
                }
            }
        }
        SampleFormat::F32P => {
            if audio.data.len() < 2 {
                return Err(Error::invalid("SILK stereo encoder: F32P missing planes"));
            }
            for i in 0..n {
                for c in 0..2 {
                    let plane = &audio.data[c];
                    if plane.len() < n * 4 {
                        return Err(Error::invalid(
                            "SILK stereo encoder: F32P plane shorter than declared sample count",
                        ));
                    }
                    let off = i * 4;
                    out[i * 2 + c] = f32::from_le_bytes([
                        plane[off],
                        plane[off + 1],
                        plane[off + 2],
                        plane[off + 3],
                    ]);
                }
            }
        }
        other => {
            return Err(Error::unsupported(format!(
                "SILK stereo encoder: sample format {other:?} not supported (use S16 / S16P / F32 / F32P)"
            )));
        }
    }
    Ok(out)
}

/// Interleaved (stride-aware) box-average downsampler. `stride` is the
/// number of interleaved channels (1 = mono, 2 = stereo). Identical
/// ratio averaging as [`downsample_box`], but keeps per-channel samples
/// aligned through the decimation.
fn downsample_box_interleaved(input: &[f32], ratio: usize, stride: usize) -> Vec<f32> {
    if ratio <= 1 {
        return input.to_vec();
    }
    debug_assert_eq!(input.len() % stride, 0);
    let n_out_frames = (input.len() / stride) / ratio;
    let mut out = vec![0f32; n_out_frames * stride];
    for i in 0..n_out_frames {
        for c in 0..stride {
            let mut sum = 0f32;
            for k in 0..ratio {
                sum += input[(i * ratio + k) * stride + c];
            }
            out[i * stride + c] = sum / ratio as f32;
        }
    }
    out
}

/// Average every `ratio` consecutive input samples into one output
/// sample. Cheap & cheerful anti-alias for speech-band content. The
/// output length is `input.len() / ratio` (any trailing partial group
/// is dropped — callers that need strict sample accounting should
/// pass multiples of `ratio`).
fn downsample_box(input: &[f32], ratio: usize) -> Vec<f32> {
    if ratio <= 1 {
        return input.to_vec();
    }
    let n_out = input.len() / ratio;
    let mut out = Vec::with_capacity(n_out);
    for i in 0..n_out {
        let mut sum = 0f32;
        for k in 0..ratio {
            sum += input[i * ratio + k];
        }
        out.push(sum / ratio as f32);
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn toc_byte_mono() {
        let b = build_toc_byte(false);
        assert_eq!(b >> 3, 31, "config should be 31");
        assert_eq!((b >> 2) & 1, 0, "stereo bit should be 0");
        assert_eq!(b & 0x3, 0, "framing code should be 0");
    }

    #[test]
    fn toc_byte_stereo() {
        let b = build_toc_byte(true);
        assert_eq!(b >> 3, 31, "config should be 31");
        assert_eq!((b >> 2) & 1, 1, "stereo bit should be 1");
        assert_eq!(b & 0x3, 0, "framing code should be 0");
    }

    #[test]
    fn rejects_non_48k() {
        let mut p = CodecParameters::audio(CodecId::new("opus"));
        p.channels = Some(1);
        p.sample_rate = Some(44_100);
        match OpusEncoder::new(&p) {
            Err(Error::Unsupported(_)) => {}
            Err(e) => panic!("expected Unsupported, got {e:?}"),
            Ok(_) => panic!("expected Unsupported, got Ok"),
        }
    }

    #[test]
    fn rejects_more_than_stereo() {
        let mut p = CodecParameters::audio(CodecId::new("opus"));
        p.channels = Some(6);
        p.sample_rate = Some(SAMPLE_RATE);
        match OpusEncoder::new(&p) {
            Err(Error::Unsupported(_)) => {}
            Err(e) => panic!("expected Unsupported, got {e:?}"),
            Ok(_) => panic!("expected Unsupported, got Ok"),
        }
    }

    #[test]
    fn new_celt_only_fb_accepts_48k_mono() {
        let mut p = CodecParameters::audio(CodecId::new("opus"));
        p.channels = Some(1);
        p.sample_rate = Some(SAMPLE_RATE);
        assert!(OpusEncoder::new_celt_only_full_band(&p).is_ok());
    }

    #[test]
    fn new_celt_only_fb_rejects_non_48k() {
        let mut p = CodecParameters::audio(CodecId::new("opus"));
        p.channels = Some(1);
        p.sample_rate = Some(16_000);
        match OpusEncoder::new_celt_only_full_band(&p) {
            Err(Error::Unsupported(_)) => {}
            Err(e) => panic!("expected Unsupported, got {e:?}"),
            Ok(_) => panic!("expected Unsupported, got Ok"),
        }
    }

    #[test]
    fn mono_encoder_produces_toc_byte() {
        let mut p = CodecParameters::audio(CodecId::new("opus"));
        p.channels = Some(1);
        p.sample_rate = Some(SAMPLE_RATE);
        let mut enc = OpusEncoder::new(&p).unwrap();
        // Feed one frame of silence.
        let bytes = vec![0u8; OPUS_FRAME_SAMPLES * 2];
        let frame = Frame::Audio(AudioFrame {
            format: SampleFormat::S16,
            channels: 1,
            sample_rate: SAMPLE_RATE,
            samples: OPUS_FRAME_SAMPLES as u32,
            pts: None,
            time_base: TimeBase::new(1, SAMPLE_RATE as i64),
            data: vec![bytes],
        });
        enc.send_frame(&frame).unwrap();
        let pkt = enc.receive_packet().unwrap();
        assert!(!pkt.data.is_empty(), "packet must contain TOC + bitstream");
        let toc = pkt.data[0];
        assert_eq!(toc >> 3, 31, "config should be 31");
        assert_eq!((toc >> 2) & 1, 0, "mono → stereo bit = 0");
        assert_eq!(toc & 0x3, 0, "single-frame packet → code 0");
    }
}