oxideav-opus 0.0.7

Opus audio codec for oxideav — SILK + CELT decode (mono/stereo), CELT-only full-band encode, SILK-only encode (NB/MB/WB, mono+stereo, 10/20/40/60 ms)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
//! Opus encoder — CELT-only full-band + the full SILK-only config
//! matrix (configs 0..=11), mono and stereo, 10 / 20 / 40 / 60 ms.
//!
//! # Mode selection
//!
//! * [`OpusEncoder::new_celt_only_full_band`] — CELT-only fullband
//!   20 ms (config 31). Accepts 48 kHz mono/stereo input. Stereo is
//!   downmixed to mono before the CELT mono core; see the original
//!   CELT section below for the honest caveat about the stereo TOC bit.
//!
//! * [`SilkEncoder`] exposes one named constructor per (bandwidth,
//!   channels, duration) tuple:
//!   - bandwidth ∈ {NB, MB, WB} (8 / 12 / 16 kHz internal rates),
//!   - channels ∈ {mono, stereo},
//!   - duration ∈ {10, 20, 40, 60} ms.
//!
//!   That gives 24 constructors, covering all 12 SILK-only `config`
//!   values × stereo bit. Each accepts either the SILK internal rate
//!   or 48 kHz; the latter is downsampled by a box-average pre-filter.
//!   40 / 60 ms packets carry 2 / 3 back-to-back 20 ms SILK frame
//!   bodies per RFC 6716 §4.2.4, so they produce a single Opus packet
//!   with framing code 0 (not code-1/2/3 — the SILK frames share one
//!   TOC byte and one range-coder bitstream).
//!
//! [`OpusEncoder::new`] routes by the `CodecParameters::sample_rate`:
//! 48 kHz mono/stereo → CELT-only FB; anything else → `Unsupported`,
//! because switching to SILK invisibly would be a nasty foot-gun for
//! callers that expected 48 kHz output parity. To emit SILK packets,
//! construct a [`SilkEncoder`] explicitly.
//!
//! Hybrid (SILK+CELT) is not implemented on either path.
//!
//! # Packet layout (RFC 6716 §3)
//!
//! ```text
//!   [ TOC byte ] [ CELT bitstream bytes ... ]
//! ```
//!
//! where the TOC byte is `(config << 3) | (stereo << 2) | code` with
//! `config = 31`, `stereo ∈ {0, 1}`, `code = 0` (single frame).
//!
//! # Supported inputs
//!
//! * S16 / S16P / F32 / F32P sample formats.
//! * 48 kHz sample rate only.
//! * Mono (channels = 1) — native path.
//! * Stereo (channels = 2) — **downmixed to mono** before being fed to
//!   the mono-only CELT encoder; the TOC is emitted with `stereo = 0`.
//!   A real CELT stereo path (coupled L/R PVQ with intensity /
//!   dual-stereo) would be needed to honestly advertise `stereo = 1`
//!   in the TOC, and the `oxideav-celt` encoder is mono-only today —
//!   see its module docs. The signal survives and decodes cleanly as
//!   duplicated-mono on both channels; per-channel detail is lost.
//!
//! # Unsupported
//!
//! * Framing codes 1/2/3 (multi-frame packets) — not emitted. 40 / 60
//!   ms SILK packets *are* emitted via RFC §4.2.4's multiple-SILK-
//!   frames-per-Opus-frame mechanism (still code = 0).
//! * CELT 2.5 / 5 / 10 ms frame sizes.
//! * Hybrid (SILK+CELT) mode.
//! * More than 2 channels.

use std::collections::VecDeque;

use oxideav_celt::encoder::{CeltEncoder, FRAME_SAMPLES, SAMPLE_RATE};
use oxideav_core::Encoder;
use oxideav_core::{
    AudioFrame, CodecId, CodecParameters, Error, Frame, Packet, Result, SampleFormat, TimeBase,
};

/// `config` field value for CELT-only, fullband, 20 ms frames.
const OPUS_CONFIG_CELT_FB_20MS: u8 = 31;

/// Build a TOC byte for config 31 (CELT-only FB 20 ms), code-0 (single
/// frame packet), with the given stereo bit.
///
/// Layout (RFC 6716 §3.1): `config(5) | stereo(1) | code(2)`.
pub fn build_toc_byte(stereo: bool) -> u8 {
    let stereo_bit: u8 = if stereo { 1 } else { 0 };
    (OPUS_CONFIG_CELT_FB_20MS << 3) | (stereo_bit << 2) // code = 0 (single frame)
}

/// Number of PCM samples per 20 ms Opus/CELT frame at 48 kHz.
pub const OPUS_FRAME_SAMPLES: usize = 960;

pub struct OpusEncoder {
    /// Output-stream parameters (after any channel-count adjustments).
    out_params: CodecParameters,
    /// Channel count on the *input* frames (1 or 2). Stereo inputs are
    /// downmixed to mono before hitting the CELT encoder.
    input_channels: u16,
    /// Sample format of the input frames (S16 / S16P / F32 / F32P). Frames
    /// no longer carry this themselves — the encoder caches it from the
    /// stream's `CodecParameters` at construction time.
    input_sample_format: SampleFormat,
    /// The underlying mono CELT encoder.
    celt: CeltEncoder,
    /// Output packet queue (one Opus packet per 20 ms of input).
    output: VecDeque<Packet>,
    /// PTS counter (in 48 kHz samples).
    pts_counter: i64,
}

impl OpusEncoder {
    /// Build a new Opus encoder. Mode selection is purely driven by the
    /// sample rate in `params`: 48 kHz → CELT-only full-band 20 ms. Any
    /// other rate returns `Error::Unsupported`.
    ///
    /// For an explicit, mode-named entry point that keeps the call-site
    /// intent obvious, see [`OpusEncoder::new_celt_only_full_band`].
    pub fn new(params: &CodecParameters) -> Result<Self> {
        let channels = params.channels.unwrap_or(1);
        if channels == 0 || channels > 2 {
            return Err(Error::unsupported(format!(
                "opus encoder: only mono/stereo supported, got {channels}-channel input"
            )));
        }
        let sr = params.sample_rate.unwrap_or(SAMPLE_RATE);
        if sr != SAMPLE_RATE {
            return Err(Error::unsupported(format!(
                "opus encoder: input must be 48 kHz (got {sr}); resample before encoding"
            )));
        }

        // Drive the underlying CELT encoder as mono — stereo input is
        // downmixed on the way in. The CELT-mono path is the only one
        // implemented today.
        let mut celt_params = params.clone();
        celt_params.channels = Some(1);
        celt_params.sample_rate = Some(SAMPLE_RATE);
        // CELT consumes the mono F32 buffer we emit from extract_mono_f32.
        // Pin the sample format here so the inner encoder doesn't fall
        // back on the caller's input format (which may be S16 or planar).
        celt_params.sample_format = Some(SampleFormat::F32);
        // CeltEncoder expects its own codec id; clone the whole parameter
        // block and override the id so the inner encoder doesn't reject
        // us for a mismatch.
        celt_params.codec_id = CodecId::new(oxideav_celt::CODEC_ID_STR);
        let celt = CeltEncoder::new(&celt_params)?;

        // Output params: we report the *input* channel count so that the
        // downstream muxer keeps the packet's implied channel layout in
        // sync with what callers asked for. The bitstream body is always
        // a mono CELT frame though — see module docs.
        let mut out_params = params.clone();
        out_params.sample_rate = Some(SAMPLE_RATE);
        out_params.channels = Some(channels);

        let input_sample_format = params.sample_format.unwrap_or(SampleFormat::S16);

        Ok(Self {
            out_params,
            input_channels: channels,
            input_sample_format,
            celt,
            output: VecDeque::new(),
            pts_counter: 0,
        })
    }

    /// Explicit CELT-only full-band (48 kHz, 20 ms) constructor. Equivalent
    /// to [`OpusEncoder::new`] with `params.sample_rate = Some(48_000)`,
    /// but documents the intent at the call site. Returns `Unsupported`
    /// if the caller passed a non-48 kHz rate.
    ///
    /// Channels must be 1 or 2. Stereo input is downmixed to mono — see
    /// the module docs for why.
    pub fn new_celt_only_full_band(params: &CodecParameters) -> Result<Self> {
        let sr = params.sample_rate.unwrap_or(SAMPLE_RATE);
        if sr != SAMPLE_RATE {
            return Err(Error::unsupported(format!(
                "opus encoder (CELT-only FB): input must be 48 kHz, got {sr}"
            )));
        }
        Self::new(params)
    }

    /// Pull all pending CELT packets out of the underlying encoder, wrap
    /// each in an Opus TOC byte, and push the resulting Opus packets to
    /// the output queue.
    fn drain_celt(&mut self) -> Result<()> {
        // CeltEncoder is mono-only so stereo_bit is always 0 here.
        let toc = build_toc_byte(false);
        loop {
            match self.celt.receive_packet() {
                Ok(celt_pkt) => {
                    let mut data = Vec::with_capacity(1 + celt_pkt.data.len());
                    data.push(toc);
                    data.extend_from_slice(&celt_pkt.data);
                    let tb = TimeBase::new(1, SAMPLE_RATE as i64);
                    let pts = self.pts_counter;
                    self.pts_counter += OPUS_FRAME_SAMPLES as i64;
                    let pkt = Packet::new(0, tb, data)
                        .with_pts(pts)
                        .with_duration(OPUS_FRAME_SAMPLES as i64);
                    self.output.push_back(pkt);
                }
                Err(Error::NeedMore) => return Ok(()),
                Err(e) => return Err(e),
            }
        }
    }
}

impl Encoder for OpusEncoder {
    fn codec_id(&self) -> &CodecId {
        &self.out_params.codec_id
    }

    fn output_params(&self) -> &CodecParameters {
        &self.out_params
    }

    fn send_frame(&mut self, frame: &Frame) -> Result<()> {
        let audio = match frame {
            Frame::Audio(a) => a,
            _ => {
                return Err(Error::invalid(
                    "opus encoder: expected audio frame, got video",
                ))
            }
        };

        // Flatten the input into a mono f32 buffer regardless of whether
        // the container was mono (passthrough) or stereo (downmix). The
        // input shape (channels + sample format) comes from the encoder's
        // stream params, since the slim AudioFrame no longer carries it.
        let mono = extract_mono_f32(audio, self.input_channels, self.input_sample_format)?;

        // Feed the CELT encoder as a single mono F32 frame.
        let mut bytes = Vec::with_capacity(mono.len() * 4);
        for &s in &mono {
            bytes.extend_from_slice(&s.to_le_bytes());
        }
        let celt_frame = Frame::Audio(AudioFrame {
            samples: mono.len() as u32,
            pts: audio.pts,
            data: vec![bytes],
        });
        self.celt.send_frame(&celt_frame)?;
        self.drain_celt()
    }

    fn receive_packet(&mut self) -> Result<Packet> {
        if let Some(p) = self.output.pop_front() {
            Ok(p)
        } else {
            Err(Error::NeedMore)
        }
    }

    fn flush(&mut self) -> Result<()> {
        self.celt.flush()?;
        self.drain_celt()?;
        Ok(())
    }
}

/// Decode the `AudioFrame`'s sample bytes into a mono f32 buffer, applying
/// a stereo → mono downmix (simple mean) when needed. Supports S16 and
/// F32 (interleaved or planar).
///
/// `channels` and `format` are sourced from the encoder's stream params,
/// since the slim `AudioFrame` no longer carries them per-frame.
fn extract_mono_f32(audio: &AudioFrame, channels: u16, format: SampleFormat) -> Result<Vec<f32>> {
    let n = audio.samples as usize;
    let ch = channels as usize;
    if ch == 0 {
        return Err(Error::invalid("opus encoder: 0-channel audio frame"));
    }
    let mut out = vec![0f32; n];
    match format {
        SampleFormat::S16 => {
            // Interleaved S16.
            let bytes = &audio.data[0];
            let needed = n * ch * 2;
            if bytes.len() < needed {
                return Err(Error::invalid(
                    "opus encoder: S16 input shorter than declared sample count",
                ));
            }
            for i in 0..n {
                let mut acc = 0i32;
                for c in 0..ch {
                    let off = (i * ch + c) * 2;
                    let s = i16::from_le_bytes([bytes[off], bytes[off + 1]]);
                    acc += s as i32;
                }
                out[i] = (acc as f32) / (ch as f32 * 32768.0);
            }
        }
        SampleFormat::S16P => {
            // One plane per channel. Mono = plane 0, stereo = two planes.
            if audio.data.len() < ch {
                return Err(Error::invalid("opus encoder: S16P input missing planes"));
            }
            for i in 0..n {
                let mut acc = 0i32;
                for c in 0..ch {
                    let plane = &audio.data[c];
                    if plane.len() < n * 2 {
                        return Err(Error::invalid(
                            "opus encoder: S16P plane shorter than declared sample count",
                        ));
                    }
                    let off = i * 2;
                    let s = i16::from_le_bytes([plane[off], plane[off + 1]]);
                    acc += s as i32;
                }
                out[i] = (acc as f32) / (ch as f32 * 32768.0);
            }
        }
        SampleFormat::F32 => {
            let bytes = &audio.data[0];
            let needed = n * ch * 4;
            if bytes.len() < needed {
                return Err(Error::invalid(
                    "opus encoder: F32 input shorter than declared sample count",
                ));
            }
            for i in 0..n {
                let mut acc = 0f32;
                for c in 0..ch {
                    let off = (i * ch + c) * 4;
                    acc += f32::from_le_bytes([
                        bytes[off],
                        bytes[off + 1],
                        bytes[off + 2],
                        bytes[off + 3],
                    ]);
                }
                out[i] = acc / ch as f32;
            }
        }
        SampleFormat::F32P => {
            if audio.data.len() < ch {
                return Err(Error::invalid("opus encoder: F32P input missing planes"));
            }
            for i in 0..n {
                let mut acc = 0f32;
                for c in 0..ch {
                    let plane = &audio.data[c];
                    if plane.len() < n * 4 {
                        return Err(Error::invalid(
                            "opus encoder: F32P plane shorter than declared sample count",
                        ));
                    }
                    let off = i * 4;
                    acc += f32::from_le_bytes([
                        plane[off],
                        plane[off + 1],
                        plane[off + 2],
                        plane[off + 3],
                    ]);
                }
                out[i] = acc / ch as f32;
            }
        }
        other => {
            return Err(Error::unsupported(format!(
                "opus encoder: sample format {:?} not supported (use S16 / S16P / F32 / F32P)",
                other
            )));
        }
    }
    // Sanity: the CELT encoder always consumes `FRAME_SAMPLES` (960) per
    // frame. We don't enforce `n == FRAME_SAMPLES` here because the
    // underlying CELT encoder buffers up to a frame boundary internally
    // — but we do surface any non-20-ms chunking downstream as Unsupported
    // there. The caller is free to send any number of samples per frame
    // as long as the aggregate ends on a frame boundary before `flush()`.
    let _ = FRAME_SAMPLES;
    Ok(out)
}

pub fn make_encoder(params: &CodecParameters) -> Result<Box<dyn Encoder>> {
    Ok(Box::new(OpusEncoder::new(params)?))
}

// ---------------------------------------------------------------------
// SILK encoder — NB / MB / WB mono + NB stereo, 20 ms.
// ---------------------------------------------------------------------

/// `config` field value for SILK-only, narrowband, 20 ms frames (§3.1
/// Table 2).
pub const OPUS_CONFIG_SILK_NB_20MS: u8 = 1;
/// `config` field value for SILK-only, mediumband, 20 ms frames.
pub const OPUS_CONFIG_SILK_MB_20MS: u8 = 5;
/// `config` field value for SILK-only, wideband, 20 ms frames.
pub const OPUS_CONFIG_SILK_WB_20MS: u8 = 9;

/// Number of PCM samples per 20 ms SILK NB frame at the internal 8 kHz
/// rate.
pub const SILK_NB_FRAME_SAMPLES_INTERNAL: usize = 160;
/// Samples per 20 ms SILK MB frame at the internal 12 kHz rate.
pub const SILK_MB_FRAME_SAMPLES_INTERNAL: usize = 240;
/// Samples per 20 ms SILK WB frame at the internal 16 kHz rate.
pub const SILK_WB_FRAME_SAMPLES_INTERNAL: usize = 320;

/// Number of PCM samples per 20 ms frame at the Opus output rate of
/// 48 kHz (for PTS accounting).
pub const SILK_FRAME_SAMPLES_48K: usize = 960;

/// Internal (SILK) rate for NB.
pub const SILK_NB_RATE: u32 = 8_000;
/// Internal (SILK) rate for MB.
pub const SILK_MB_RATE: u32 = 12_000;
/// Internal (SILK) rate for WB.
pub const SILK_WB_RATE: u32 = 16_000;

/// Build a TOC byte for a SILK-only narrowband 20 ms (config 1) packet.
///
/// Layout (RFC 6716 §3.1): `config(5) | stereo(1) | code(2)`.
pub fn build_silk_nb_20ms_toc(stereo: bool) -> u8 {
    let stereo_bit: u8 = if stereo { 1 } else { 0 };
    (OPUS_CONFIG_SILK_NB_20MS << 3) | (stereo_bit << 2) // code = 0
}

/// Build a TOC byte for a SILK-only mediumband 20 ms (config 5) packet.
pub fn build_silk_mb_20ms_toc(stereo: bool) -> u8 {
    let stereo_bit: u8 = if stereo { 1 } else { 0 };
    (OPUS_CONFIG_SILK_MB_20MS << 3) | (stereo_bit << 2)
}

/// Build a TOC byte for a SILK-only wideband 20 ms (config 9) packet.
pub fn build_silk_wb_20ms_toc(stereo: bool) -> u8 {
    let stereo_bit: u8 = if stereo { 1 } else { 0 };
    (OPUS_CONFIG_SILK_WB_20MS << 3) | (stereo_bit << 2)
}

/// Opus audio bandwidth for a SILK-only mode.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum SilkBw {
    Nb,
    Mb,
    Wb,
}

impl SilkBw {
    fn internal_rate(self) -> u32 {
        match self {
            SilkBw::Nb => SILK_NB_RATE,
            SilkBw::Mb => SILK_MB_RATE,
            SilkBw::Wb => SILK_WB_RATE,
        }
    }
    /// Samples per 20 ms SILK frame at the internal rate.
    fn frame_samples_20ms(self) -> usize {
        match self {
            SilkBw::Nb => SILK_NB_FRAME_SAMPLES_INTERNAL,
            SilkBw::Mb => SILK_MB_FRAME_SAMPLES_INTERNAL,
            SilkBw::Wb => SILK_WB_FRAME_SAMPLES_INTERNAL,
        }
    }
}

/// Concrete SILK mode this `SilkEncoder` instance emits.
///
/// A SILK mode is the tuple of (bandwidth, channels, duration). Together
/// they fix the TOC `config` field (0..=11) plus the stereo bit. The
/// decoder's frame-size mapping is in RFC 6716 Table 2.
#[derive(Copy, Clone, Debug)]
struct SilkMode {
    bw: SilkBw,
    stereo: bool,
    /// Opus frame duration in milliseconds: 10, 20, 40, or 60.
    duration_ms: u32,
}

impl SilkMode {
    fn new(bw: SilkBw, stereo: bool, duration_ms: u32) -> Self {
        debug_assert!(matches!(duration_ms, 10 | 20 | 40 | 60));
        Self {
            bw,
            stereo,
            duration_ms,
        }
    }
    /// RFC 6716 Table 2 `config` field for this mode.
    fn config(self) -> u8 {
        // Each bandwidth occupies a block of 4 configs: NB=0..=3,
        // MB=4..=7, WB=8..=11. Within each block the order is
        // 10/20/40/60 ms.
        let base = match self.bw {
            SilkBw::Nb => 0u8,
            SilkBw::Mb => 4,
            SilkBw::Wb => 8,
        };
        let offset = match self.duration_ms {
            10 => 0,
            20 => 1,
            40 => 2,
            60 => 3,
            _ => unreachable!(),
        };
        base + offset
    }
    fn toc_byte(self) -> u8 {
        let stereo_bit: u8 = if self.stereo { 1 } else { 0 };
        (self.config() << 3) | (stereo_bit << 2)
    }
    fn internal_rate(self) -> u32 {
        self.bw.internal_rate()
    }
    /// Number of 20 ms SILK frames packed into this Opus frame per
    /// RFC §4.2.4. 10 ms = 1 (half-length), 20 ms = 1, 40 ms = 2,
    /// 60 ms = 3.
    fn silk_frames_per_packet(self) -> usize {
        match self.duration_ms {
            10 | 20 => 1,
            40 => 2,
            60 => 3,
            _ => unreachable!(),
        }
    }
    /// Number of sub-frames in each embedded SILK frame (2 for 10 ms,
    /// 4 for 20/40/60 ms).
    fn subframes_per_silk_frame(self) -> usize {
        if self.duration_ms == 10 {
            2
        } else {
            4
        }
    }
    /// Total internal-rate samples carried by this Opus frame.
    fn frame_samples_internal(self) -> usize {
        let per_silk = match self.duration_ms {
            10 => self.bw.frame_samples_20ms() / 2,
            _ => self.bw.frame_samples_20ms(),
        };
        per_silk * self.silk_frames_per_packet()
    }
    /// Samples per *embedded* SILK frame at the internal rate (one of
    /// the 1/2/3 blocks that make up a 10/20/40/60 ms Opus frame).
    fn samples_per_silk_frame(self) -> usize {
        self.frame_samples_internal() / self.silk_frames_per_packet()
    }
    /// PCM samples per Opus frame at the 48 kHz output rate (for PTS
    /// accounting).
    fn frame_samples_48k(self) -> usize {
        match self.duration_ms {
            10 => 480,
            20 => 960,
            40 => 1920,
            60 => 2880,
            _ => unreachable!(),
        }
    }
    fn input_channels(self) -> u16 {
        if self.stereo {
            2
        } else {
            1
        }
    }
    fn is_stereo(self) -> bool {
        self.stereo
    }
    /// Bytes of range-encoder storage to allocate per packet. Sized so
    /// the MVP per-sample nibble carrier fits with headroom; stereo
    /// doubles the mono budget. 40 / 60 ms packets carry 2 / 3 back-to-
    /// back SILK frame bodies, so the budget scales linearly.
    fn buffer_bytes(self) -> u32 {
        let samples = self.frame_samples_internal();
        // ~17 bits per sample worst-case (nibble+nibble + sign), plus
        // headers. Round up to a 64-byte multiple with 2× headroom.
        let base = (samples * 17) / 8 + 128;
        let doubled = if self.is_stereo() { base * 2 } else { base };
        doubled.next_multiple_of(64).max(384) as u32
    }
    /// 48 kHz → internal-rate downsample ratio (integer).
    fn downsample_ratio(self) -> usize {
        (SAMPLE_RATE / self.internal_rate()) as usize
    }
}

/// SILK-mode Opus encoder — covers the full SILK-only config matrix
/// (configs 0..=11), mono and stereo, 10 / 20 / 40 / 60 ms frames.
///
/// Emits a TOC byte matching the configured mode followed by the SILK
/// bitstream described in [`crate::silk::encoder`]. Accepts either the
/// SILK internal rate (8 kHz / 12 kHz / 16 kHz for NB / MB / WB) or the
/// 48 kHz Opus output rate; non-internal input is downsampled by a
/// simple box-average pre-filter.
///
/// Named entry points (one per (bandwidth, channels, duration) tuple):
///
/// * 20 ms mono: [`SilkEncoder::new_nb_mono_20ms`], `new_mb_mono_20ms`,
///   `new_wb_mono_20ms` (configs 1, 5, 9).
/// * 20 ms stereo: [`SilkEncoder::new_nb_stereo_20ms`],
///   `new_mb_stereo_20ms`, `new_wb_stereo_20ms` (configs 1/5/9 + stereo
///   bit). Runs a mid/side pair of [`SilkFrameEncoder`]s and emits the
///   RFC §4.2.7.1 prediction header.
/// * 10 ms mono + stereo: configs 0, 4, 8 (half the sub-frame count
///   per embedded SILK frame).
/// * 40 ms mono + stereo: configs 2, 6, 10 — packet carries 2 back-to-
///   back 20 ms SILK frame bodies per RFC §4.2.4.
/// * 60 ms mono + stereo: configs 3, 7, 11 — 3 back-to-back 20 ms SILK
///   frame bodies.
///
/// Round-trip SNR > 20 dB on speech-like input through the crate's own
/// SILK decoder for every 20 ms mode (see `encoder_roundtrip.rs`).
/// 10 ms frames lose a bit of first-frame SNR because the LPC history
/// starts cold; 40/60 ms frames match the 20 ms bar because each
/// embedded SILK frame carries its own LPC history.
///
/// Out of scope for this pass: voiced/LTP path, LBRR (redundancy),
/// Hybrid.
pub struct SilkEncoder {
    out_params: CodecParameters,
    mode: SilkMode,
    /// Per-frame SILK encoder for the mid (or mono) channel.
    silk_mid: crate::silk::encoder::SilkFrameEncoder,
    /// Side-channel encoder (stereo only).
    silk_side: Option<crate::silk::encoder::SilkFrameEncoder>,
    /// Pending internal-rate samples. For stereo, interleaved L/R.
    pending_internal: VecDeque<f32>,
    /// Expected input sample rate (internal or 48 kHz).
    input_sample_rate: u32,
    /// Sample format of the input frames; cached from stream params since
    /// the slim `AudioFrame` no longer carries it.
    input_sample_format: SampleFormat,
    /// Output packet queue.
    output: VecDeque<Packet>,
    /// PTS counter in 48 kHz samples.
    pts_counter: i64,
}

impl SilkEncoder {
    // ---- 20 ms mono + stereo, all 3 bandwidths ---------------------

    /// Build a SILK NB mono 20 ms encoder. Input: 8 kHz or 48 kHz mono.
    pub fn new_nb_mono_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, false, 20))
    }

    /// Build a SILK MB mono 20 ms encoder. Input: 12 kHz or 48 kHz mono.
    pub fn new_mb_mono_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, false, 20))
    }

    /// Build a SILK WB mono 20 ms encoder. Input: 16 kHz or 48 kHz mono.
    pub fn new_wb_mono_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, false, 20))
    }

    /// Build a SILK NB stereo 20 ms encoder. Input: 8 kHz or 48 kHz
    /// stereo (interleaved L/R). Emits a mid/side-coded packet with the
    /// stereo prediction header from RFC §4.2.7.1.
    pub fn new_nb_stereo_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, true, 20))
    }

    /// MB stereo 20 ms (config 5 + stereo bit).
    pub fn new_mb_stereo_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, true, 20))
    }

    /// WB stereo 20 ms (config 9 + stereo bit).
    pub fn new_wb_stereo_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, true, 20))
    }

    // ---- 10 ms mono + stereo, all 3 bandwidths ---------------------

    /// NB mono 10 ms (config 0). Input: 8 kHz or 48 kHz mono.
    pub fn new_nb_mono_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, false, 10))
    }

    /// MB mono 10 ms (config 4). Input: 12 kHz or 48 kHz mono.
    pub fn new_mb_mono_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, false, 10))
    }

    /// WB mono 10 ms (config 8). Input: 16 kHz or 48 kHz mono.
    pub fn new_wb_mono_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, false, 10))
    }

    /// NB stereo 10 ms (config 0 + stereo bit).
    pub fn new_nb_stereo_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, true, 10))
    }

    /// MB stereo 10 ms (config 4 + stereo bit).
    pub fn new_mb_stereo_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, true, 10))
    }

    /// WB stereo 10 ms (config 8 + stereo bit).
    pub fn new_wb_stereo_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, true, 10))
    }

    // ---- 40 ms mono + stereo, all 3 bandwidths ---------------------
    //
    // 40 ms Opus frames contain 2 back-to-back 20 ms SILK frames per
    // RFC §4.2.4. The header VAD/LBRR flags span both SILK frames.

    /// NB mono 40 ms (config 2).
    pub fn new_nb_mono_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, false, 40))
    }

    /// MB mono 40 ms (config 6).
    pub fn new_mb_mono_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, false, 40))
    }

    /// WB mono 40 ms (config 10).
    pub fn new_wb_mono_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, false, 40))
    }

    /// NB stereo 40 ms.
    pub fn new_nb_stereo_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, true, 40))
    }

    /// MB stereo 40 ms.
    pub fn new_mb_stereo_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, true, 40))
    }

    /// WB stereo 40 ms.
    pub fn new_wb_stereo_40ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, true, 40))
    }

    // ---- 60 ms mono + stereo, all 3 bandwidths ---------------------

    /// NB mono 60 ms (config 3).
    pub fn new_nb_mono_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, false, 60))
    }

    /// MB mono 60 ms (config 7).
    pub fn new_mb_mono_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, false, 60))
    }

    /// WB mono 60 ms (config 11).
    pub fn new_wb_mono_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, false, 60))
    }

    /// NB stereo 60 ms.
    pub fn new_nb_stereo_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Nb, true, 60))
    }

    /// MB stereo 60 ms.
    pub fn new_mb_stereo_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Mb, true, 60))
    }

    /// WB stereo 60 ms.
    pub fn new_wb_stereo_60ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, SilkMode::new(SilkBw::Wb, true, 60))
    }

    fn build_frame_encoder(mode: SilkMode) -> crate::silk::encoder::SilkFrameEncoder {
        let bw_params = match mode.bw {
            SilkBw::Nb => crate::silk::encoder::BandwidthParams::nb(),
            SilkBw::Mb => crate::silk::encoder::BandwidthParams::mb(),
            SilkBw::Wb => crate::silk::encoder::BandwidthParams::wb(),
        };
        let subframes = mode.subframes_per_silk_frame();
        crate::silk::encoder::SilkFrameEncoder::new_with_subframes(bw_params, subframes)
    }

    fn new_mode(params: &CodecParameters, mode: SilkMode) -> Result<Self> {
        let channels = params.channels.unwrap_or(mode.input_channels());
        if channels != mode.input_channels() {
            return Err(Error::unsupported(format!(
                "SILK encoder: {:?} expects {}-channel input, got {channels} channels",
                mode,
                mode.input_channels()
            )));
        }
        let sr = params.sample_rate.unwrap_or(SAMPLE_RATE);
        let internal = mode.internal_rate();
        if sr != internal && sr != SAMPLE_RATE {
            return Err(Error::unsupported(format!(
                "SILK encoder: {mode:?} expects {internal} Hz or 48 kHz input, got {sr} Hz"
            )));
        }
        let silk_mid = Self::build_frame_encoder(mode);
        let silk_side = if mode.is_stereo() {
            Some(Self::build_frame_encoder(mode))
        } else {
            None
        };

        let mut out_params = params.clone();
        out_params.sample_rate = Some(SAMPLE_RATE);
        out_params.channels = Some(mode.input_channels());
        let per_frame_items =
            mode.frame_samples_internal() * (if mode.is_stereo() { 2 } else { 1 });

        let input_sample_format = params.sample_format.unwrap_or(SampleFormat::S16);

        Ok(Self {
            out_params,
            mode,
            silk_mid,
            silk_side,
            pending_internal: VecDeque::with_capacity(per_frame_items * 2),
            input_sample_rate: sr,
            input_sample_format,
            output: VecDeque::new(),
            pts_counter: 0,
        })
    }

    fn drain_frames(&mut self) -> Result<()> {
        let samples_per_frame = self.mode.frame_samples_internal();
        let per_frame_items = samples_per_frame * (if self.mode.is_stereo() { 2 } else { 1 });
        while self.pending_internal.len() >= per_frame_items {
            if self.mode.is_stereo() {
                let mut left = Vec::with_capacity(samples_per_frame);
                let mut right = Vec::with_capacity(samples_per_frame);
                for _ in 0..samples_per_frame {
                    left.push(self.pending_internal.pop_front().unwrap_or(0.0));
                    right.push(self.pending_internal.pop_front().unwrap_or(0.0));
                }
                let pkt = self.encode_one_stereo_frame(&left, &right)?;
                self.output.push_back(pkt);
            } else {
                let mut frame = Vec::with_capacity(samples_per_frame);
                for _ in 0..samples_per_frame {
                    frame.push(self.pending_internal.pop_front().unwrap_or(0.0));
                }
                let pkt = self.encode_one_mono_frame(&frame)?;
                self.output.push_back(pkt);
            }
        }
        Ok(())
    }

    /// Emit the shared VAD + LBRR header at the top of an Opus frame
    /// per RFC §4.2.3 / §4.2.4. Layout (from libopus `silk_Decode`):
    ///
    /// ```text
    ///   for each internal channel n:
    ///     for each packet frame i:
    ///       vad_flags[n][i] = ec_dec_bit_logp(1)
    ///     lbrr_flag[n]      = ec_dec_bit_logp(1)
    /// ```
    ///
    /// We always emit `vad = 1` (active frame) and `lbrr = 0` (no
    /// redundancy). For stereo the mid channel's bits come first,
    /// then the side channel's. For multi-frame packets (40 / 60 ms)
    /// the VAD bits for all internal frames are emitted before the
    /// LBRR flag (one LBRR flag per channel, regardless of frame
    /// count).
    fn emit_shared_header(&self, enc: &mut oxideav_celt::range_encoder::RangeEncoder) {
        let n_silk_frames = self.mode.silk_frames_per_packet();
        let n_channels = if self.mode.is_stereo() { 2 } else { 1 };
        for _ch in 0..n_channels {
            for _i in 0..n_silk_frames {
                enc.encode_bit_logp(true, 1); // VAD = 1
            }
            enc.encode_bit_logp(false, 1); // LBRR = 0
        }
    }

    fn encode_one_mono_frame(&mut self, pcm_internal: &[f32]) -> Result<Packet> {
        debug_assert_eq!(pcm_internal.len(), self.mode.frame_samples_internal());
        let mut re = oxideav_celt::range_encoder::RangeEncoder::new(self.mode.buffer_bytes());

        self.emit_shared_header(&mut re);

        // Emit 1 / 2 / 3 back-to-back SILK frame bodies. For 10 and
        // 20 ms Opus frames this is just one body.
        let per_silk = self.mode.samples_per_silk_frame();
        let n = self.mode.silk_frames_per_packet();
        for i in 0..n {
            let start = i * per_silk;
            self.silk_mid
                .encode_frame_body(&pcm_internal[start..start + per_silk], &mut re)?;
        }

        let body = re
            .done()
            .map_err(|e| Error::other(format!("SILK encoder: {e}")))?;
        let body = strip_trailing_zeros(body);
        self.finish_packet(body)
    }

    fn encode_one_stereo_frame(&mut self, left: &[f32], right: &[f32]) -> Result<Packet> {
        debug_assert_eq!(left.len(), right.len());
        debug_assert_eq!(left.len(), self.mode.frame_samples_internal());
        let mut re = oxideav_celt::range_encoder::RangeEncoder::new(self.mode.buffer_bytes());

        self.emit_shared_header(&mut re);

        // For stereo, the mid/side split is done *per 20 ms SILK sub-
        // frame* because the decoder emits one stereo prediction header
        // + one mid body + one side body per sub-frame (RFC §4.2.4).
        let per_silk = self.mode.samples_per_silk_frame();
        let n = self.mode.silk_frames_per_packet();
        for i in 0..n {
            let start = i * per_silk;
            let lc = &left[start..start + per_silk];
            let rc = &right[start..start + per_silk];

            // Stereo prediction weights (0, 0) for the MVP — see the
            // comment in the 20 ms path.
            let (mid, side) = crate::silk::encoder::stereo_mid_side(lc, rc);
            crate::silk::encoder::encode_stereo_pred_weights(&mut re, [0, 0]);

            // Mid then side body. The decoder reads the mid-only flag
            // only when the side VAD is 0; we emit VAD=1 for the side
            // channel (see `emit_shared_header`), so no extra bit.
            self.silk_mid.encode_frame_body(&mid, &mut re)?;
            let side_enc = self
                .silk_side
                .as_mut()
                .ok_or_else(|| Error::other("SILK stereo encoder: missing side state"))?;
            side_enc.encode_frame_body(&side, &mut re)?;
        }

        let body = re
            .done()
            .map_err(|e| Error::other(format!("SILK encoder: {e}")))?;
        let body = strip_trailing_zeros(body);
        self.finish_packet(body)
    }

    fn finish_packet(&mut self, body: Vec<u8>) -> Result<Packet> {
        let toc = self.mode.toc_byte();
        let mut data = Vec::with_capacity(1 + body.len());
        data.push(toc);
        data.extend_from_slice(&body);

        let tb = TimeBase::new(1, SAMPLE_RATE as i64);
        let pts = self.pts_counter;
        let samples_48k = self.mode.frame_samples_48k() as i64;
        self.pts_counter += samples_48k;
        Ok(Packet::new(0, tb, data)
            .with_pts(pts)
            .with_duration(samples_48k))
    }
}

/// Trim trailing zero bytes from a range-encoded buffer. The last
/// non-zero byte of the main bitstream + the optional back-buffer
/// bits fully determine the decoded symbols; any trailing zeros are
/// padding the CELT range encoder writes to its allocated storage.
fn strip_trailing_zeros(mut v: Vec<u8>) -> Vec<u8> {
    while v.len() > 1 && v.last() == Some(&0) {
        v.pop();
    }
    v
}

impl Encoder for SilkEncoder {
    fn codec_id(&self) -> &CodecId {
        &self.out_params.codec_id
    }

    fn output_params(&self) -> &CodecParameters {
        &self.out_params
    }

    fn send_frame(&mut self, frame: &Frame) -> Result<()> {
        let audio = match frame {
            Frame::Audio(a) => a,
            _ => {
                return Err(Error::invalid(
                    "SILK encoder: expected audio frame, got video",
                ))
            }
        };

        // Extract f32 samples. Mono modes feed `extract_mono_f32`; the
        // stereo mode keeps per-channel planes interleaved so the
        // caller-side mid/side split stays bit-exact. Channel count and
        // sample format come from the encoder's stream params, since the
        // slim `AudioFrame` no longer carries them per-frame.
        let internal_items_per_sample = if self.mode.is_stereo() { 2 } else { 1 };
        let internal_samples: Vec<f32> = if self.mode.is_stereo() {
            let stereo = extract_stereo_f32(audio, self.input_sample_format)?;
            if self.input_sample_rate == self.mode.internal_rate() {
                stereo
            } else {
                downsample_box_interleaved(&stereo, self.mode.downsample_ratio(), 2)
            }
        } else {
            let mono =
                extract_mono_f32(audio, self.mode.input_channels(), self.input_sample_format)?;
            if self.input_sample_rate == self.mode.internal_rate() {
                mono
            } else {
                downsample_box(&mono, self.mode.downsample_ratio())
            }
        };

        debug_assert_eq!(
            internal_items_per_sample * (internal_samples.len() / internal_items_per_sample),
            internal_samples.len()
        );
        self.pending_internal.extend(&internal_samples);
        self.drain_frames()
    }

    fn receive_packet(&mut self) -> Result<Packet> {
        if let Some(p) = self.output.pop_front() {
            Ok(p)
        } else {
            Err(Error::NeedMore)
        }
    }

    fn flush(&mut self) -> Result<()> {
        if !self.pending_internal.is_empty() {
            let per_frame_items =
                self.mode.frame_samples_internal() * (if self.mode.is_stereo() { 2 } else { 1 });
            while self.pending_internal.len() % per_frame_items != 0 {
                self.pending_internal.push_back(0.0);
            }
            self.drain_frames()?;
        }
        Ok(())
    }
}

/// Extract an interleaved L/R f32 buffer from an `AudioFrame`. Supports
/// the same sample formats as [`extract_mono_f32`] but returns
/// `samples * 2` floats, preserving per-channel detail.
///
/// `format` comes from the encoder's stream params, since the slim
/// `AudioFrame` no longer carries it per-frame. Stereo channel count is
/// implied by the SILK stereo mode that calls this helper.
fn extract_stereo_f32(audio: &AudioFrame, format: SampleFormat) -> Result<Vec<f32>> {
    let n = audio.samples as usize;
    let mut out = vec![0f32; n * 2];
    match format {
        SampleFormat::S16 => {
            let bytes = &audio.data[0];
            let needed = n * 2 * 2;
            if bytes.len() < needed {
                return Err(Error::invalid(
                    "SILK stereo encoder: S16 input shorter than declared sample count",
                ));
            }
            for i in 0..n {
                for c in 0..2 {
                    let off = (i * 2 + c) * 2;
                    let s = i16::from_le_bytes([bytes[off], bytes[off + 1]]);
                    out[i * 2 + c] = s as f32 / 32768.0;
                }
            }
        }
        SampleFormat::S16P => {
            if audio.data.len() < 2 {
                return Err(Error::invalid("SILK stereo encoder: S16P missing planes"));
            }
            for i in 0..n {
                for c in 0..2 {
                    let plane = &audio.data[c];
                    if plane.len() < n * 2 {
                        return Err(Error::invalid(
                            "SILK stereo encoder: S16P plane shorter than declared sample count",
                        ));
                    }
                    let off = i * 2;
                    let s = i16::from_le_bytes([plane[off], plane[off + 1]]);
                    out[i * 2 + c] = s as f32 / 32768.0;
                }
            }
        }
        SampleFormat::F32 => {
            let bytes = &audio.data[0];
            let needed = n * 2 * 4;
            if bytes.len() < needed {
                return Err(Error::invalid(
                    "SILK stereo encoder: F32 input shorter than declared sample count",
                ));
            }
            for i in 0..n {
                for c in 0..2 {
                    let off = (i * 2 + c) * 4;
                    out[i * 2 + c] = f32::from_le_bytes([
                        bytes[off],
                        bytes[off + 1],
                        bytes[off + 2],
                        bytes[off + 3],
                    ]);
                }
            }
        }
        SampleFormat::F32P => {
            if audio.data.len() < 2 {
                return Err(Error::invalid("SILK stereo encoder: F32P missing planes"));
            }
            for i in 0..n {
                for c in 0..2 {
                    let plane = &audio.data[c];
                    if plane.len() < n * 4 {
                        return Err(Error::invalid(
                            "SILK stereo encoder: F32P plane shorter than declared sample count",
                        ));
                    }
                    let off = i * 4;
                    out[i * 2 + c] = f32::from_le_bytes([
                        plane[off],
                        plane[off + 1],
                        plane[off + 2],
                        plane[off + 3],
                    ]);
                }
            }
        }
        other => {
            return Err(Error::unsupported(format!(
                "SILK stereo encoder: sample format {other:?} not supported (use S16 / S16P / F32 / F32P)"
            )));
        }
    }
    Ok(out)
}

/// Interleaved (stride-aware) box-average downsampler. `stride` is the
/// number of interleaved channels (1 = mono, 2 = stereo). Identical
/// ratio averaging as [`downsample_box`], but keeps per-channel samples
/// aligned through the decimation.
fn downsample_box_interleaved(input: &[f32], ratio: usize, stride: usize) -> Vec<f32> {
    if ratio <= 1 {
        return input.to_vec();
    }
    debug_assert_eq!(input.len() % stride, 0);
    let n_out_frames = (input.len() / stride) / ratio;
    let mut out = vec![0f32; n_out_frames * stride];
    for i in 0..n_out_frames {
        for c in 0..stride {
            let mut sum = 0f32;
            for k in 0..ratio {
                sum += input[(i * ratio + k) * stride + c];
            }
            out[i * stride + c] = sum / ratio as f32;
        }
    }
    out
}

/// Average every `ratio` consecutive input samples into one output
/// sample. Cheap & cheerful anti-alias for speech-band content. The
/// output length is `input.len() / ratio` (any trailing partial group
/// is dropped — callers that need strict sample accounting should
/// pass multiples of `ratio`).
fn downsample_box(input: &[f32], ratio: usize) -> Vec<f32> {
    if ratio <= 1 {
        return input.to_vec();
    }
    let n_out = input.len() / ratio;
    let mut out = Vec::with_capacity(n_out);
    for i in 0..n_out {
        let mut sum = 0f32;
        for k in 0..ratio {
            sum += input[i * ratio + k];
        }
        out.push(sum / ratio as f32);
    }
    out
}

// ---------------------------------------------------------------------
// Hybrid encoder — SILK-WB low band + CELT high band, mono 20 ms.
// ---------------------------------------------------------------------

/// `config` field value for Hybrid SWB 10 ms (RFC 6716 Table 2).
pub const OPUS_CONFIG_HYBRID_SWB_10MS: u8 = 12;
/// `config` field value for Hybrid SWB 20 ms (RFC 6716 Table 2).
pub const OPUS_CONFIG_HYBRID_SWB_20MS: u8 = 13;
/// `config` field value for Hybrid FB 10 ms.
pub const OPUS_CONFIG_HYBRID_FB_10MS: u8 = 14;
/// `config` field value for Hybrid FB 20 ms.
pub const OPUS_CONFIG_HYBRID_FB_20MS: u8 = 15;

/// Audio bandwidth for a Hybrid mode encoder. Hybrid SILK always runs as
/// WB (16 kHz internal); only the CELT high-band cutoff differs between
/// SWB (12 kHz) and FB (20 kHz).
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum HybridBandwidth {
    /// Super-Wideband — CELT covers bands 17..19 (8..12 kHz).
    Swb,
    /// Fullband — CELT covers bands 17..21 (8..20 kHz).
    Fb,
}

impl HybridBandwidth {
    /// CELT `end_band` for this hybrid bandwidth (decoder mirror is in
    /// `oxideav_celt::tables::end_band_for_bandwidth_celt`).
    pub fn celt_end_band(self) -> usize {
        match self {
            HybridBandwidth::Swb => 19,
            HybridBandwidth::Fb => 21,
        }
    }

    /// TOC `config` field value for 20 ms hybrid (RFC 6716 Table 2).
    pub fn config_20ms(self) -> u8 {
        match self {
            HybridBandwidth::Swb => OPUS_CONFIG_HYBRID_SWB_20MS,
            HybridBandwidth::Fb => OPUS_CONFIG_HYBRID_FB_20MS,
        }
    }

    /// TOC `config` field value for 10 ms hybrid (RFC 6716 Table 2).
    pub fn config_10ms(self) -> u8 {
        match self {
            HybridBandwidth::Swb => OPUS_CONFIG_HYBRID_SWB_10MS,
            HybridBandwidth::Fb => OPUS_CONFIG_HYBRID_FB_10MS,
        }
    }
}

/// CELT `start_band` for every Hybrid configuration (RFC 6716 §4.4).
/// SILK covers up to 8 kHz, which is exactly the band-17 boundary in
/// the CELT band table (`EBAND_5MS[17] = 40` ↔ `40 * 200 Hz/bin = 8 kHz`).
pub const HYBRID_CELT_START_BAND: usize = 17;

/// 48 kHz samples in a 10 ms Hybrid frame (configs 12 / 14). 480 = half
/// the 20 ms count.
pub const HYBRID_FRAME_SAMPLES_48K_10MS: usize = 480;

/// Total bytes the Hybrid encoder allocates for a 20 ms packet (TOC +
/// SILK + CELT). The SILK MVP carrier produces a thicker body than a
/// rate-tight RFC §4.2.7.8 shell coder would (see SilkMode::buffer_bytes
/// — WB 20 ms sizes the SILK side at ~832 bytes), so the full hybrid
/// budget needs to hold both layers comfortably. We pick FB-bigger-than-
/// SWB so the CELT high-band gets more pulse budget for its wider band
/// span (4 vs 2 coded bands).
const HYBRID_PACKET_BYTES_SWB_20MS: usize = 1024;
const HYBRID_PACKET_BYTES_FB_20MS: usize = 1275;
/// Stereo Hybrid budgets: capped at 1275 bytes / frame because RFC 6716
/// §3.2.1 requires "the length of any individual frame MUST NOT exceed
/// 1275 bytes". libopus rejects oversized frames in `opus_packet_parse`.
/// At 510 kbit/s for 20 ms this is comfortably above the rate at which
/// stereo Hybrid is musically useful (~64 kbit/s). The SILK MVP carrier
/// is the dominant consumer here — see SilkMode::buffer_bytes for the
/// per-channel sizing.
const HYBRID_PACKET_BYTES_SWB_20MS_STEREO: usize = 1275;
const HYBRID_PACKET_BYTES_FB_20MS_STEREO: usize = 1275;

/// 10 ms Hybrid budgets — the SILK 10 ms WB body is roughly half the
/// 20 ms size (2 sub-frames vs 4) but the per-frame range-coder
/// overhead doesn't shrink linearly, so we allocate ~80 % of the 20 ms
/// size rather than exactly half. Both stereo budgets stay clamped to
/// the RFC 6716 §3.2.1 1275-byte hard cap.
const HYBRID_PACKET_BYTES_SWB_10MS: usize = 800;
const HYBRID_PACKET_BYTES_FB_10MS: usize = 1024;
const HYBRID_PACKET_BYTES_SWB_10MS_STEREO: usize = 1275;
const HYBRID_PACKET_BYTES_FB_10MS_STEREO: usize = 1275;

/// Build a TOC byte for a Hybrid 20 ms packet.
pub fn build_hybrid_20ms_toc(bw: HybridBandwidth, stereo: bool) -> u8 {
    let stereo_bit: u8 = if stereo { 1 } else { 0 };
    (bw.config_20ms() << 3) | (stereo_bit << 2) // code = 0
}

/// Build a TOC byte for a Hybrid 10 ms packet.
pub fn build_hybrid_10ms_toc(bw: HybridBandwidth, stereo: bool) -> u8 {
    let stereo_bit: u8 = if stereo { 1 } else { 0 };
    (bw.config_10ms() << 3) | (stereo_bit << 2) // code = 0
}

/// Hybrid (SILK + CELT) Opus encoder for 20 ms frames at SWB or FB —
/// mono and stereo.
///
/// Per RFC 6716 §4.4 the SILK part of a Hybrid frame always runs as WB
/// (16 kHz internal, covering 0..8 kHz) regardless of whether the TOC
/// bandwidth is SWB or FB. The CELT part starts at band 17 (the 8 kHz
/// edge) and covers either 8..12 kHz (SWB) or 8..20 kHz (FB), sharing
/// the same range-coded bitstream as the SILK body — that's what makes
/// Hybrid a single-packet hybrid rather than two independent streams.
///
/// Input: 48 kHz PCM (S16, S16P, F32, F32P), mono or stereo. Stereo
/// inputs feed a mid/side pair into two SILK WB frame encoders for the
/// low band and a dual-stereo CELT encoder for the high band. The
/// encoder downsamples its SILK low-band view to 16 kHz internally; the
/// CELT high-band runs on the original 48 kHz. Output: one Opus packet
/// per 20 ms frame with TOC config 13 (SWB) or 15 (FB).
///
/// 10 ms Hybrid (configs 12 / 14) routes through the same `HybridEncoder`
/// type; pick the duration via [`HybridEncoder::new_swb_mono_10ms`] /
/// `new_fb_mono_10ms` / `new_swb_stereo_10ms` / `new_fb_stereo_10ms`.
/// The 10 ms path uses the CELT LM=2 short-block (480-sample) frame and
/// the SILK 2-sub-frame WB body; otherwise the layout matches the
/// 20 ms variant.
pub struct HybridEncoder {
    out_params: CodecParameters,
    bw: HybridBandwidth,
    /// True for stereo packets (TOC stereo bit = 1).
    stereo: bool,
    /// Frame duration in 48 kHz samples — 480 for 10 ms (configs 12 /
    /// 14), 960 for 20 ms (configs 13 / 15). Drives both the SILK-side
    /// frame length and the CELT MDCT LM.
    frame_samples_48k: usize,
    /// SILK frame encoder for the mid (or mono) channel — WB 4 sub-frames
    /// for 20 ms, WB 2 sub-frames for 10 ms.
    silk_mid: crate::silk::encoder::SilkFrameEncoder,
    /// SILK frame encoder for the side channel (stereo only).
    silk_side: Option<crate::silk::encoder::SilkFrameEncoder>,
    /// CELT encoder used for the high-band only — mono (1 ch) or
    /// dual-stereo (2 ch) at 48 kHz. Built with
    /// `new_with_frame_samples(_, 960)` for 20 ms and
    /// `new_with_frame_samples(_, 480)` for 10 ms.
    celt: oxideav_celt::encoder::CeltEncoder,
    /// 48 kHz pending PCM samples — interleaved L,R for stereo, plain
    /// mono otherwise.
    pending_48k: VecDeque<f32>,
    input_sample_format: SampleFormat,
    /// Output packet queue.
    output: VecDeque<Packet>,
    /// PTS counter in 48 kHz samples.
    pts_counter: i64,
}

impl HybridEncoder {
    /// Build a Hybrid SWB 20 ms mono encoder (TOC config 13).
    pub fn new_swb_mono_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, HybridBandwidth::Swb, false, SILK_FRAME_SAMPLES_48K)
    }

    /// Build a Hybrid FB 20 ms mono encoder (TOC config 15).
    pub fn new_fb_mono_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, HybridBandwidth::Fb, false, SILK_FRAME_SAMPLES_48K)
    }

    /// Build a Hybrid SWB 20 ms stereo encoder (TOC config 13 + stereo
    /// bit). Input: 48 kHz interleaved L/R.
    pub fn new_swb_stereo_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, HybridBandwidth::Swb, true, SILK_FRAME_SAMPLES_48K)
    }

    /// Build a Hybrid FB 20 ms stereo encoder (TOC config 15 + stereo
    /// bit). Input: 48 kHz interleaved L/R.
    pub fn new_fb_stereo_20ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(params, HybridBandwidth::Fb, true, SILK_FRAME_SAMPLES_48K)
    }

    /// Build a Hybrid SWB 10 ms mono encoder (TOC config 12). The SILK
    /// low band runs as a 2-sub-frame WB body; the CELT high band uses
    /// the LM=2 (480-sample / 10 ms) MDCT.
    pub fn new_swb_mono_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(
            params,
            HybridBandwidth::Swb,
            false,
            HYBRID_FRAME_SAMPLES_48K_10MS,
        )
    }

    /// Build a Hybrid FB 10 ms mono encoder (TOC config 14).
    pub fn new_fb_mono_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(
            params,
            HybridBandwidth::Fb,
            false,
            HYBRID_FRAME_SAMPLES_48K_10MS,
        )
    }

    /// Build a Hybrid SWB 10 ms stereo encoder (TOC config 12 + stereo
    /// bit). Input: 48 kHz interleaved L/R.
    pub fn new_swb_stereo_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(
            params,
            HybridBandwidth::Swb,
            true,
            HYBRID_FRAME_SAMPLES_48K_10MS,
        )
    }

    /// Build a Hybrid FB 10 ms stereo encoder (TOC config 14 + stereo
    /// bit). Input: 48 kHz interleaved L/R.
    pub fn new_fb_stereo_10ms(params: &CodecParameters) -> Result<Self> {
        Self::new_mode(
            params,
            HybridBandwidth::Fb,
            true,
            HYBRID_FRAME_SAMPLES_48K_10MS,
        )
    }

    fn new_mode(
        params: &CodecParameters,
        bw: HybridBandwidth,
        stereo: bool,
        frame_samples_48k: usize,
    ) -> Result<Self> {
        debug_assert!(matches!(
            frame_samples_48k,
            HYBRID_FRAME_SAMPLES_48K_10MS | SILK_FRAME_SAMPLES_48K
        ));
        let want_channels: u16 = if stereo { 2 } else { 1 };
        let channels = params.channels.unwrap_or(want_channels);
        if channels != want_channels {
            return Err(Error::unsupported(format!(
                "Hybrid encoder: {} expects {want_channels}-channel input, got {channels}",
                if stereo { "stereo" } else { "mono" }
            )));
        }
        let sr = params.sample_rate.unwrap_or(SAMPLE_RATE);
        if sr != SAMPLE_RATE {
            return Err(Error::unsupported(format!(
                "Hybrid encoder: input must be 48 kHz (got {sr})"
            )));
        }

        // SILK runs as WB; 4 sub-frames for 20 ms, 2 sub-frames for 10 ms.
        let (silk_mid, silk_side) = if frame_samples_48k == SILK_FRAME_SAMPLES_48K {
            let m = crate::silk::encoder::SilkFrameEncoder::new_wb_20ms();
            let s = if stereo {
                Some(crate::silk::encoder::SilkFrameEncoder::new_wb_20ms())
            } else {
                None
            };
            (m, s)
        } else {
            let m = crate::silk::encoder::SilkFrameEncoder::new_wb_10ms();
            let s = if stereo {
                Some(crate::silk::encoder::SilkFrameEncoder::new_wb_10ms())
            } else {
                None
            };
            (m, s)
        };

        // CELT runs at FB 48 kHz; channels matches the Hybrid input.
        // Mono uses encode_hybrid_body_mono; stereo uses
        // encode_hybrid_body_stereo. Both reuse the encoder's per-channel
        // MDCT, pre-emphasis, and band-energy state. The CELT LM is
        // picked from `frame_samples_48k`: 960 → LM=3 (20 ms), 480 →
        // LM=2 (10 ms).
        let mut celt_params = params.clone();
        celt_params.channels = Some(channels);
        celt_params.sample_rate = Some(SAMPLE_RATE);
        celt_params.sample_format = Some(SampleFormat::F32);
        celt_params.codec_id = CodecId::new(oxideav_celt::CODEC_ID_STR);
        let celt = oxideav_celt::encoder::CeltEncoder::new_with_frame_samples(
            &celt_params,
            frame_samples_48k,
        )?;

        let mut out_params = params.clone();
        out_params.sample_rate = Some(SAMPLE_RATE);
        out_params.channels = Some(channels);

        let input_sample_format = params.sample_format.unwrap_or(SampleFormat::S16);

        Ok(Self {
            out_params,
            bw,
            stereo,
            frame_samples_48k,
            silk_mid,
            silk_side,
            celt,
            pending_48k: VecDeque::with_capacity(SILK_FRAME_SAMPLES_48K * 4),
            input_sample_format,
            output: VecDeque::new(),
            pts_counter: 0,
        })
    }

    /// Encode one Hybrid mono frame (10 ms or 20 ms, picked from
    /// `self.frame_samples_48k`).
    ///
    /// `pcm_48k_mono` must be exactly `self.frame_samples_48k` samples
    /// (480 for 10 ms / 960 for 20 ms). Order:
    /// 1. Build the shared range encoder.
    /// 2. Emit the shared SILK VAD/LBRR header (1 VAD bit + 1 LBRR bit
    ///    for the single internal channel).
    /// 3. Downsample to 16 kHz and encode the SILK WB body (160 or
    ///    320 samples internal) into the same encoder.
    /// 4. Encode the CELT high-band body (start_band=17) into the same
    ///    encoder, with the appropriate end_band for SWB / FB.
    /// 5. Finalise (`done()`) and prepend the TOC byte.
    fn encode_one_frame(&mut self, pcm_48k_mono: &[f32]) -> Result<Packet> {
        debug_assert_eq!(pcm_48k_mono.len(), self.frame_samples_48k);

        let bytes = self.packet_bytes_mono();
        let mut re = oxideav_celt::range_encoder::RangeEncoder::new(bytes as u32);

        // SILK shared header: one channel → one VAD bit (active) + one
        // LBRR bit (zero, no redundancy). Mirrors the layout the
        // hybrid decoder reads via the SILK-only sub-decoder path.
        re.encode_bit_logp(true, 1); // VAD
        re.encode_bit_logp(false, 1); // LBRR

        // SILK body — downsample 48 kHz → 16 kHz (ratio 3). 320 samples
        // for 20 ms WB, 160 samples for 10 ms WB.
        let silk_internal = downsample_box(pcm_48k_mono, 3);
        debug_assert_eq!(silk_internal.len(), self.silk_mid.frame_len());
        self.silk_mid.encode_frame_body(&silk_internal, &mut re)?;

        // CELT high-band body — same range encoder, start at band 17.
        let end_band = self.bw.celt_end_band();
        let celt_budget = Self::compute_celt_budget(&re, bytes);
        self.celt.encode_hybrid_body_mono(
            pcm_48k_mono,
            &mut re,
            HYBRID_CELT_START_BAND,
            end_band,
            celt_budget,
        )?;

        self.finalize_packet(re, false)
    }

    /// Encode one Hybrid stereo frame (10 ms or 20 ms). `pcm_48k_lr`
    /// must be exactly `2 * self.frame_samples_48k` interleaved L/R
    /// samples (960 for 10 ms, 1920 for 20 ms).
    ///
    /// Layout matches what the hybrid stereo decoder expects:
    /// 1. Shared SILK VAD + LBRR header for **two** internal channels
    ///    (mid then side: 1 VAD + 1 LBRR per channel = 4 bits total).
    /// 2. Stereo prediction header (RFC §4.2.7.1) — emitted with weights
    ///    of (0, 0) for this MVP, matching the SILK stereo encoder.
    /// 3. Mid SILK WB body, then side SILK WB body — both encoded into
    ///    the same range encoder.
    /// 4. CELT high-band body via `encode_hybrid_body_stereo`
    ///    (`start_band = 17`, `channels = 2`, dual-stereo coupling).
    /// 5. Finalise and prepend the TOC byte (config 12/13/14/15 +
    ///    stereo bit).
    fn encode_one_frame_stereo(&mut self, pcm_48k_lr: &[f32]) -> Result<Packet> {
        let frame_n = self.frame_samples_48k;
        debug_assert_eq!(pcm_48k_lr.len(), 2 * frame_n);

        let bytes = self.packet_bytes_stereo();
        let mut re = oxideav_celt::range_encoder::RangeEncoder::new(bytes as u32);

        // SILK shared header — two internal channels: VAD+LBRR per channel.
        // Order: vad[mid], lbrr[mid], vad[side], lbrr[side]. Mirrors the
        // SilkEncoder::emit_shared_header layout for stereo with one SILK
        // frame per packet.
        re.encode_bit_logp(true, 1); // mid VAD
        re.encode_bit_logp(false, 1); // mid LBRR
        re.encode_bit_logp(true, 1); // side VAD
        re.encode_bit_logp(false, 1); // side LBRR

        // De-interleave to L / R.
        let mut left = Vec::with_capacity(frame_n);
        let mut right = Vec::with_capacity(frame_n);
        for i in 0..frame_n {
            left.push(pcm_48k_lr[2 * i]);
            right.push(pcm_48k_lr[2 * i + 1]);
        }

        // Downsample 48 kHz → 16 kHz, then mid/side split. 320 samples
        // for 20 ms WB, 160 samples for 10 ms WB.
        let l_internal = downsample_box(&left, 3);
        let r_internal = downsample_box(&right, 3);
        debug_assert_eq!(l_internal.len(), self.silk_mid.frame_len());
        debug_assert_eq!(r_internal.len(), self.silk_mid.frame_len());
        let (mid, side) = crate::silk::encoder::stereo_mid_side(&l_internal, &r_internal);

        // SILK stereo prediction header: emit (0, 0) for the MVP — the
        // existing SILK stereo encoder uses the same placeholder.
        crate::silk::encoder::encode_stereo_pred_weights(&mut re, [0, 0]);

        // Mid then side body. Both VAD bits are 1 (set in the header
        // above) so the mid-only flag is implicitly absent.
        self.silk_mid.encode_frame_body(&mid, &mut re)?;
        let side_enc = self
            .silk_side
            .as_mut()
            .ok_or_else(|| Error::other("Hybrid stereo encoder: missing side state"))?;
        side_enc.encode_frame_body(&side, &mut re)?;

        // CELT high-band — dual-stereo, start_band=17.
        let end_band = self.bw.celt_end_band();
        let celt_budget = Self::compute_celt_budget(&re, bytes);
        self.celt.encode_hybrid_body_stereo(
            pcm_48k_lr,
            &mut re,
            HYBRID_CELT_START_BAND,
            end_band,
            celt_budget,
        )?;

        self.finalize_packet(re, true)
    }

    /// Total packet byte budget for the mono path — picks the per-mode
    /// constant based on `self.frame_samples_48k` and `self.bw`. RFC 6716
    /// §3.2.1 caps every per-frame value at 1275 bytes.
    fn packet_bytes_mono(&self) -> usize {
        match (self.frame_samples_48k, self.bw) {
            (SILK_FRAME_SAMPLES_48K, HybridBandwidth::Swb) => HYBRID_PACKET_BYTES_SWB_20MS,
            (SILK_FRAME_SAMPLES_48K, HybridBandwidth::Fb) => HYBRID_PACKET_BYTES_FB_20MS,
            (HYBRID_FRAME_SAMPLES_48K_10MS, HybridBandwidth::Swb) => HYBRID_PACKET_BYTES_SWB_10MS,
            (HYBRID_FRAME_SAMPLES_48K_10MS, HybridBandwidth::Fb) => HYBRID_PACKET_BYTES_FB_10MS,
            _ => unreachable!("Hybrid encoder: unsupported frame_samples_48k"),
        }
    }

    /// Total packet byte budget for the stereo path. All variants are
    /// capped at the RFC 6716 §3.2.1 1275-byte limit since SILK MVP
    /// stereo at this rate dominates the budget on either duration.
    fn packet_bytes_stereo(&self) -> usize {
        match (self.frame_samples_48k, self.bw) {
            (SILK_FRAME_SAMPLES_48K, HybridBandwidth::Swb) => HYBRID_PACKET_BYTES_SWB_20MS_STEREO,
            (SILK_FRAME_SAMPLES_48K, HybridBandwidth::Fb) => HYBRID_PACKET_BYTES_FB_20MS_STEREO,
            (HYBRID_FRAME_SAMPLES_48K_10MS, HybridBandwidth::Swb) => {
                HYBRID_PACKET_BYTES_SWB_10MS_STEREO
            }
            (HYBRID_FRAME_SAMPLES_48K_10MS, HybridBandwidth::Fb) => {
                HYBRID_PACKET_BYTES_FB_10MS_STEREO
            }
            _ => unreachable!("Hybrid encoder: unsupported frame_samples_48k"),
        }
    }

    /// Compute the bytes available to CELT after the SILK body has been
    /// emitted into `re`. Floored at 16 bytes so the CELT bit allocator
    /// always has a sensible budget.
    fn compute_celt_budget(re: &oxideav_celt::range_encoder::RangeEncoder, bytes: usize) -> usize {
        let bits_used = re.tell() as usize;
        let bytes_used = bits_used.div_ceil(8);
        bytes.saturating_sub(bytes_used).max(16)
    }

    fn finalize_packet(
        &mut self,
        re: oxideav_celt::range_encoder::RangeEncoder,
        stereo: bool,
    ) -> Result<Packet> {
        let body = re
            .done()
            .map_err(|e| Error::other(format!("Hybrid encoder: {e}")))?;
        let body = strip_trailing_zeros(body);

        let toc = if self.frame_samples_48k == SILK_FRAME_SAMPLES_48K {
            build_hybrid_20ms_toc(self.bw, stereo)
        } else {
            build_hybrid_10ms_toc(self.bw, stereo)
        };
        let mut data = Vec::with_capacity(1 + body.len());
        data.push(toc);
        data.extend_from_slice(&body);

        let tb = TimeBase::new(1, SAMPLE_RATE as i64);
        let pts = self.pts_counter;
        self.pts_counter += self.frame_samples_48k as i64;
        Ok(Packet::new(0, tb, data)
            .with_pts(pts)
            .with_duration(self.frame_samples_48k as i64))
    }

    fn drain_frames(&mut self) -> Result<()> {
        let per_frame = if self.stereo {
            self.frame_samples_48k * 2
        } else {
            self.frame_samples_48k
        };
        while self.pending_48k.len() >= per_frame {
            let mut frame = Vec::with_capacity(per_frame);
            for _ in 0..per_frame {
                frame.push(self.pending_48k.pop_front().unwrap_or(0.0));
            }
            let pkt = if self.stereo {
                self.encode_one_frame_stereo(&frame)?
            } else {
                self.encode_one_frame(&frame)?
            };
            self.output.push_back(pkt);
        }
        Ok(())
    }
}

impl Encoder for HybridEncoder {
    fn codec_id(&self) -> &CodecId {
        &self.out_params.codec_id
    }

    fn output_params(&self) -> &CodecParameters {
        &self.out_params
    }

    fn send_frame(&mut self, frame: &Frame) -> Result<()> {
        let audio = match frame {
            Frame::Audio(a) => a,
            _ => {
                return Err(Error::invalid(
                    "Hybrid encoder: expected audio frame, got video",
                ))
            }
        };
        if self.stereo {
            let lr = extract_stereo_f32(audio, self.input_sample_format)?;
            self.pending_48k.extend(&lr);
        } else {
            let mono = extract_mono_f32(audio, 1, self.input_sample_format)?;
            self.pending_48k.extend(&mono);
        }
        self.drain_frames()
    }

    fn receive_packet(&mut self) -> Result<Packet> {
        if let Some(p) = self.output.pop_front() {
            Ok(p)
        } else {
            Err(Error::NeedMore)
        }
    }

    fn flush(&mut self) -> Result<()> {
        let per_frame = if self.stereo {
            self.frame_samples_48k * 2
        } else {
            self.frame_samples_48k
        };
        if !self.pending_48k.is_empty() {
            while self.pending_48k.len() % per_frame != 0 {
                self.pending_48k.push_back(0.0);
            }
            self.drain_frames()?;
        }
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn toc_byte_mono() {
        let b = build_toc_byte(false);
        assert_eq!(b >> 3, 31, "config should be 31");
        assert_eq!((b >> 2) & 1, 0, "stereo bit should be 0");
        assert_eq!(b & 0x3, 0, "framing code should be 0");
    }

    #[test]
    fn toc_byte_stereo() {
        let b = build_toc_byte(true);
        assert_eq!(b >> 3, 31, "config should be 31");
        assert_eq!((b >> 2) & 1, 1, "stereo bit should be 1");
        assert_eq!(b & 0x3, 0, "framing code should be 0");
    }

    #[test]
    fn rejects_non_48k() {
        let mut p = CodecParameters::audio(CodecId::new("opus"));
        p.channels = Some(1);
        p.sample_rate = Some(44_100);
        match OpusEncoder::new(&p) {
            Err(Error::Unsupported(_)) => {}
            Err(e) => panic!("expected Unsupported, got {e:?}"),
            Ok(_) => panic!("expected Unsupported, got Ok"),
        }
    }

    #[test]
    fn rejects_more_than_stereo() {
        let mut p = CodecParameters::audio(CodecId::new("opus"));
        p.channels = Some(6);
        p.sample_rate = Some(SAMPLE_RATE);
        match OpusEncoder::new(&p) {
            Err(Error::Unsupported(_)) => {}
            Err(e) => panic!("expected Unsupported, got {e:?}"),
            Ok(_) => panic!("expected Unsupported, got Ok"),
        }
    }

    #[test]
    fn new_celt_only_fb_accepts_48k_mono() {
        let mut p = CodecParameters::audio(CodecId::new("opus"));
        p.channels = Some(1);
        p.sample_rate = Some(SAMPLE_RATE);
        assert!(OpusEncoder::new_celt_only_full_band(&p).is_ok());
    }

    #[test]
    fn new_celt_only_fb_rejects_non_48k() {
        let mut p = CodecParameters::audio(CodecId::new("opus"));
        p.channels = Some(1);
        p.sample_rate = Some(16_000);
        match OpusEncoder::new_celt_only_full_band(&p) {
            Err(Error::Unsupported(_)) => {}
            Err(e) => panic!("expected Unsupported, got {e:?}"),
            Ok(_) => panic!("expected Unsupported, got Ok"),
        }
    }

    #[test]
    fn mono_encoder_produces_toc_byte() {
        let mut p = CodecParameters::audio(CodecId::new("opus"));
        p.channels = Some(1);
        p.sample_rate = Some(SAMPLE_RATE);
        let mut enc = OpusEncoder::new(&p).unwrap();
        // Feed one frame of silence.
        let bytes = vec![0u8; OPUS_FRAME_SAMPLES * 2];
        let frame = Frame::Audio(AudioFrame {
            samples: OPUS_FRAME_SAMPLES as u32,
            pts: None,
            data: vec![bytes],
        });
        enc.send_frame(&frame).unwrap();
        let pkt = enc.receive_packet().unwrap();
        assert!(!pkt.data.is_empty(), "packet must contain TOC + bitstream");
        let toc = pkt.data[0];
        assert_eq!(toc >> 3, 31, "config should be 31");
        assert_eq!((toc >> 2) & 1, 0, "mono → stereo bit = 0");
        assert_eq!(toc & 0x3, 0, "single-frame packet → code 0");
    }
}