oxideav_midi/
lib.rs

1//! MIDI — Standard MIDI File (SMF) parser + transport metadata + soft-synth.
2//!
3//! * **[`smf`]** — pure-Rust parser for the Standard MIDI File format
4//!   (Type 0 / 1 / 2). Header (`MThd`) + tracks (`MTrk`) + every common
5//!   channel-voice message, sysex (`F0` / `F7`), and meta event
6//!   (tempo, time signature, key signature, text, marker, end-of-track,
7//!   SMPTE offset, sequencer-specific). Running status is honoured;
8//!   VLQs are bounded to 4 bytes per spec; chunk lengths are validated
9//!   against remaining bytes; total events per file are capped at
10//!   [`smf::MAX_EVENTS_PER_FILE`].
11//! * **[`paths`]** — per-OS SoundFont/SFZ/DLS search paths plus the
12//!   `OXIDEAV_SOUNDFONT_PATH` environment override. `find_soundfonts`
13//!   walks them and returns every instrument-bank file present.
14//! * **[`instruments`]** — [`instruments::Instrument`] trait. Three
15//!   adapters:
16//!     * **[`instruments::sf2`]** — full SoundFont 2 RIFF reader +
17//!       voice generator. Walks the `sfbk` form, cross-resolves the
18//!       preset → instrument → zone → sample chain, and renders
19//!       sm24-aware 24-bit PCM at the requested pitch via linear
20//!       interpolation. Honours the volume + modulation DAHDSR
21//!       envelopes, the initial low-pass biquad filter, mod-env →
22//!       pitch / filter routing, exclusive-class drum cuts, and
23//!       native stereo zones.
24//!     * **[`instruments::sfz`]** — text patch reader **plus voice
25//!       generator**. Strips comments, walks `<control>` /
26//!       `<global>` / `<master>` / `<group>` / `<region>` sections,
27//!       flattens inheritance into one fully-resolved opcode map per
28//!       region, and (via
29//!       [`SfzInstrument::open`](instruments::sfz::SfzInstrument::open))
30//!       reads every referenced sample off disk. Voice generation
31//!       decodes the WAV sample bytes, picks the matching region by
32//!       (key, velocity), shifts pitch off `pitch_keycenter` + `tune` +
33//!       `transpose`, and runs a DAHDSR amplitude envelope
34//!       (`ampeg_*`) + vibrato LFO (`lfo01_*`).
35//!     * **[`instruments::dls`]** — DLS (Downloadable Sounds)
36//!       Level 1 + Level 2 RIFF reader **plus voice generator**.
37//!       Walks the `DLS ` form, parses the `colh` / `vers` / `ptbl`
38//!       pool table / `lins-list` instrument table / `wvpl-list`
39//!       wave pool, and surfaces a fully-cross-resolved
40//!       [`DlsBank`](instruments::dls::DlsBank) of instruments →
41//!       regions → wave-pool samples with their `wsmp` loops,
42//!       `wlnk` cue references, and `art1` / `art2` articulation
43//!       connection blocks. `make_voice` resolves the wlnk → ptbl →
44//!       wave-pool entry, decodes the PCM, and plays the sample
45//!       through [`SamplePlayer`](instruments::sample_voice::SamplePlayer).
46//!       `art1`/`art2` connection-block evaluation is round 2.
47//!     * **[`instruments::sample_voice`]** — shared sample-playback
48//!       voice (mono in, mono out) used by both SFZ and DLS. Covers
49//!       DAHDSR amplitude envelope, four loop modes, pitch bend,
50//!       and a vibrato LFO.
51//!     * **[`instruments::wav_pcm`]** — minimal RIFF/WAVE PCM
52//!       decoder used by the SFZ and DLS sample loaders.
53//!     * **[`instruments::tone`]** — sine/triangle/saw/square
54//!       fallback so the synth produces *something* even when no
55//!       on-disk bank is present.
56//! * **[`mixer`]** — polyphonic voice pool (32 voices) with stereo
57//!   mixdown, per-channel volume / pan / sustain pedal handling, and
58//!   oldest-voice preemption when the pool is full. Round 75 adds the
59//!   full RPN 1 / RPN 2 / RPN 5 control surface (channel fine + coarse
60//!   tune + modulation-depth range), CC 1 (mod wheel) → per-voice
61//!   depth, CC 74 (MPE "third dimension") → per-voice timbre, the
62//!   `MpeZone` / `MpeRole` topology built from MCM messages, and
63//!   universal-SysEx-driven master volume / master fine / master
64//!   coarse tuning that sum with per-channel tuning into the
65//!   effective pitch each voice receives.
66//! * **[`scheduler`]** — SMF event scheduler. Merges every track into a
67//!   single time-ordered stream, converts ticks → samples against the
68//!   current tempo + division, and dispatches each event into the
69//!   mixer at the right audio sample. Round 75 wires the Universal
70//!   Real-Time / Non-Real-Time SysEx routing: GM 1 / GM 2 / GM Off
71//!   reset, CA-25 Master Fine / Master Coarse Tuning, Master Volume,
72//!   plus the CC 1 / CC 74 / MPE-MCM channel-CC paths.
73//! * **[`downloader`]** — stub that names a planned default bank
74//!   (TimGM6mb) but currently returns [`Error::Unsupported`].
75//!
76//! The decoder factory ([`make_decoder`]) is registered under codec id
77//! [`CODEC_ID_STR`] = `"midi"`. Round-3 wires SMF events end-to-end:
78//! `send_packet` parses the SMF and primes the scheduler; `receive_frame`
79//! pulls one chunk of stereo PCM ([`FRAME_SAMPLES`] samples per channel
80//! at [`OUTPUT_SAMPLE_RATE`]) until both the event stream and the voice
81//! pool have run dry, then returns [`Error::Eof`].
82//!
83//! Without an instrument bank the decoder uses
84//! [`instruments::tone::ToneInstrument`] — the pure-tone fallback —
85//! so a `.mid` file plays back as audible-but-not-musical sine /
86//! triangle / square waves. To use a real bank, build the decoder by
87//! hand and pass an [`Sf2Instrument`](instruments::sf2::Sf2Instrument)
88//! to [`MidiDecoder::with_instrument`]; the decoder factory wired into
89//! the registry today does not yet plumb a bank-discovery hook.
90
91pub mod downloader;
92pub mod instruments;
93pub mod mixer;
94pub mod paths;
95pub mod scheduler;
96pub mod smf;
97pub mod tuning;
98
99use std::path::{Path, PathBuf};
100use std::sync::Arc;
101
102use oxideav_core::{
103    AudioFrame, CodecCapabilities, CodecId, CodecInfo, CodecParameters, CodecRegistry, Decoder,
104    Error, Frame, Packet, Result,
105};
106
107use crate::instruments::dls::DlsInstrument;
108use crate::instruments::sf2::Sf2Instrument;
109use crate::instruments::sfz::SfzInstrument;
110use crate::instruments::tone::ToneInstrument;
111use crate::instruments::Instrument;
112use crate::mixer::Mixer;
113use crate::scheduler::Scheduler;
114
115/// Public codec id string. Matches the aggregator feature name `midi`.
116pub const CODEC_ID_STR: &str = "midi";
117
118/// Round-3 audio output sample rate. Hard-coded to 44 100 Hz so the
119/// decoder doesn't need a parameter from the caller (the SMF container
120/// itself doesn't carry one). Round-4 may wire this through
121/// `CodecParameters::sample_rate`.
122pub const OUTPUT_SAMPLE_RATE: u32 = 44_100;
123
124/// Number of *per-channel* samples emitted per
125/// [`Decoder::receive_frame`] call. ~23 ms at 44.1 kHz — small enough
126/// for low playback latency, big enough that the per-call overhead is
127/// dwarfed by the inner mix loop.
128pub const FRAME_SAMPLES: usize = 1024;
129
130/// Channel count of the PCM output bus. Stereo. Same fixed assumption
131/// as [`OUTPUT_SAMPLE_RATE`].
132pub const OUTPUT_CHANNELS: u16 = 2;
133
134/// Register the MIDI codec. Round-3 produces interleaved S16 stereo
135/// PCM at [`OUTPUT_SAMPLE_RATE`] — the registry-built decoder uses the
136/// pure-tone fallback because we don't yet have a bank-discovery hook
137/// in the factory signature. Callers who want SoundFont 2 playback
138/// should build the decoder by hand via [`MidiDecoder::with_instrument`].
139pub fn register_codecs(reg: &mut CodecRegistry) {
140    let caps = CodecCapabilities::audio("midi_synth")
141        .with_lossy(false)
142        .with_lossless(true)
143        .with_intra_only(false)
144        .with_max_channels(OUTPUT_CHANNELS);
145    reg.register(
146        CodecInfo::new(CodecId::new(CODEC_ID_STR))
147            .capabilities(caps)
148            .decoder(make_decoder),
149    );
150}
151
152fn make_decoder(_params: &CodecParameters) -> Result<Box<dyn Decoder>> {
153    Ok(Box::new(MidiDecoder::new(
154        Arc::new(ToneInstrument::new()),
155        OUTPUT_SAMPLE_RATE,
156    )))
157}
158
159/// Soft-synth decoder: SMF in, interleaved S16 stereo PCM out.
160///
161/// Stateful — accepts exactly one SMF blob via [`send_packet`] and then
162/// streams audio frames out of [`receive_frame`] until both the event
163/// scheduler and the voice pool have run dry, at which point
164/// [`Error::Eof`] is returned. Calling `send_packet` again replaces the
165/// scheduler with a fresh one (re-priming for a new file).
166///
167/// State that survives across `receive_frame` calls:
168///   * the merged event list + cursor + sample clock (in [`Scheduler`])
169///   * the voice pool + per-channel CC state (in [`Mixer`])
170///   * a small carry-over flag that lets the decoder render a few
171///     extra trailing chunks after the last event so release tails
172///     don't get cut off mid-envelope.
173///
174/// [`send_packet`]: Decoder::send_packet
175/// [`receive_frame`]: Decoder::receive_frame
176pub struct MidiDecoder {
177    codec_id: CodecId,
178    instrument: Arc<dyn Instrument>,
179    sample_rate: u32,
180    /// `None` until the first `send_packet` arrives.
181    scheduler: Option<Scheduler>,
182    mixer: Mixer,
183    /// Scratch stereo planes — reused across `receive_frame` calls so
184    /// we don't reallocate on every chunk.
185    left: Vec<f32>,
186    right: Vec<f32>,
187    /// Sample PTS of the next emitted frame (in `1/sample_rate` units).
188    next_pts: i64,
189    /// Set once the scheduler has run dry; we keep emitting frames
190    /// until the voice pool falls silent too.
191    drained: bool,
192    /// Set once we've returned `Error::Eof` once — subsequent calls
193    /// keep returning `Eof`.
194    finished: bool,
195    /// Bound on extra "tail" chunks emitted after the scheduler is done
196    /// but voices may still be releasing. Worst-case the longest
197    /// release in [`Sf2Voice`](instruments::sf2::Sf2Voice) is 50 ms = 3
198    /// chunks at 1024 samples / 44.1 kHz; tone voices are 100 ms = 5
199    /// chunks. Bound generously at 32 to also cover a long looping
200    /// sample whose release window is unusually long.
201    tail_chunks_remaining: usize,
202}
203
204impl MidiDecoder {
205    /// Hard cap on how many extra audio chunks we'll emit after the
206    /// last SMF event has fired. Voice release tails (50–100 ms with
207    /// the round-2/3 envelopes) live inside this budget; without it,
208    /// a malformed or never-releasing voice could keep the decoder
209    /// emitting forever.
210    pub const TAIL_CHUNK_CAP: usize = 32;
211
212    /// Build a decoder bound to a specific instrument and sample rate.
213    /// Use this directly when you have a SoundFont 2 bank loaded and
214    /// want to drive the synth with it; the [`make_decoder`] factory
215    /// (called by the codec registry) builds one with the pure-tone
216    /// fallback because there's no instrument-discovery plumbing in
217    /// the factory signature yet.
218    pub fn new(instrument: Arc<dyn Instrument>, sample_rate: u32) -> Self {
219        Self {
220            codec_id: CodecId::new(CODEC_ID_STR),
221            instrument,
222            sample_rate,
223            scheduler: None,
224            mixer: Mixer::new(),
225            left: vec![0.0; FRAME_SAMPLES],
226            right: vec![0.0; FRAME_SAMPLES],
227            next_pts: 0,
228            drained: false,
229            finished: false,
230            tail_chunks_remaining: Self::TAIL_CHUNK_CAP,
231        }
232    }
233
234    /// Convenience constructor: same as [`new`](Self::new) but takes a
235    /// concrete [`Instrument`] by value and wraps it in an `Arc`.
236    pub fn with_instrument(instrument: Arc<dyn Instrument>) -> Self {
237        Self::new(instrument, OUTPUT_SAMPLE_RATE)
238    }
239
240    /// Build a decoder bound to an instrument loaded from a path on
241    /// disk. The format is dispatched by [`InstrumentSource`] so the
242    /// caller picks SFZ / SF2 / DLS explicitly (file extensions are
243    /// not always reliable indicators).
244    pub fn with_instrument_source(source: InstrumentSource) -> Result<Self> {
245        let inst = source.load()?;
246        Ok(Self::new(inst, OUTPUT_SAMPLE_RATE))
247    }
248}
249
250/// Source descriptor for an external instrument bank. Paired with
251/// [`MidiDecoder::with_instrument_source`] so a caller that only knows
252/// "I have an SF2 file at this path" doesn't have to type the full
253/// `Arc::new(Sf2Instrument::open(...))` chain.
254#[derive(Clone, Debug)]
255pub enum InstrumentSource {
256    /// SoundFont 2 bank — loaded via [`Sf2Instrument::open`].
257    Sf2(PathBuf),
258    /// SFZ text patch — loaded via [`SfzInstrument::open`] (samples
259    /// are read off disk relative to the patch's directory).
260    Sfz(PathBuf),
261    /// DLS Level 1 / 2 bank — loaded via [`DlsInstrument::open`].
262    Dls(PathBuf),
263    /// Pure-tone fallback (no on-disk file).
264    Tone,
265}
266
267impl InstrumentSource {
268    /// Load the bank at the named path and wrap it in an `Arc<dyn
269    /// Instrument>`. Returns the underlying error (file-not-found,
270    /// invalid magic, malformed RIFF chunk, etc.) on failure.
271    pub fn load(self) -> Result<Arc<dyn Instrument>> {
272        match self {
273            InstrumentSource::Sf2(p) => Ok(Arc::new(Sf2Instrument::open(&p)?)),
274            InstrumentSource::Sfz(p) => Ok(Arc::new(SfzInstrument::open(&p)?)),
275            InstrumentSource::Dls(p) => Ok(Arc::new(DlsInstrument::open(&p)?)),
276            InstrumentSource::Tone => Ok(Arc::new(ToneInstrument::new())),
277        }
278    }
279
280    /// Convenience constructors mirroring the enum variants. Each
281    /// takes anything `Path`-convertible.
282    pub fn sf2(path: impl AsRef<Path>) -> Self {
283        Self::Sf2(path.as_ref().to_path_buf())
284    }
285    pub fn sfz(path: impl AsRef<Path>) -> Self {
286        Self::Sfz(path.as_ref().to_path_buf())
287    }
288    pub fn dls(path: impl AsRef<Path>) -> Self {
289        Self::Dls(path.as_ref().to_path_buf())
290    }
291}
292
293impl MidiDecoder {
294    /// Sample rate the decoder is rendering at. Equal to whatever was
295    /// passed to [`new`](Self::new) (default [`OUTPUT_SAMPLE_RATE`] when
296    /// constructed via the registry).
297    pub fn sample_rate(&self) -> u32 {
298        self.sample_rate
299    }
300
301    /// Borrow the active scheduler — `None` until the first
302    /// `send_packet`. Useful for diagnostics + tests.
303    pub fn scheduler(&self) -> Option<&Scheduler> {
304        self.scheduler.as_ref()
305    }
306
307    /// Convert the planar stereo `(left, right)` buffers into one
308    /// interleaved S16 [`AudioFrame`].
309    fn build_audio_frame(&mut self) -> Frame {
310        let n = self.left.len();
311        let mut bytes = Vec::with_capacity(n * 2 * 2); // 2 bytes/sample × 2 channels
312        for i in 0..n {
313            let l = (self.left[i].clamp(-1.0, 1.0) * 32_767.0) as i16;
314            let r = (self.right[i].clamp(-1.0, 1.0) * 32_767.0) as i16;
315            bytes.extend_from_slice(&l.to_le_bytes());
316            bytes.extend_from_slice(&r.to_le_bytes());
317        }
318        let pts = Some(self.next_pts);
319        self.next_pts = self.next_pts.saturating_add(n as i64);
320        Frame::Audio(AudioFrame {
321            samples: n as u32,
322            pts,
323            data: vec![bytes],
324        })
325    }
326}
327
328impl Decoder for MidiDecoder {
329    fn codec_id(&self) -> &CodecId {
330        &self.codec_id
331    }
332
333    fn send_packet(&mut self, packet: &Packet) -> Result<()> {
334        // Confirm the packet at least *looks* like an SMF — saves the
335        // user from a "synthesis pending" misdiagnosis when the real
336        // issue is a mis-routed packet.
337        if packet.data.len() < 4 || &packet.data[0..4] != b"MThd" {
338            return Err(Error::invalid(
339                "MIDI: packet does not start with the 'MThd' header chunk",
340            ));
341        }
342        let smf = crate::smf::parse(&packet.data)?;
343        // Prime the scheduler. Dropping the previous one (if any)
344        // discards any partially-played file — callers should call
345        // `flush` first if that matters.
346        self.scheduler = Some(Scheduler::new(&smf, self.sample_rate));
347        self.mixer.all_notes_off();
348        self.next_pts = 0;
349        self.drained = false;
350        self.finished = false;
351        self.tail_chunks_remaining = Self::TAIL_CHUNK_CAP;
352        Ok(())
353    }
354
355    fn receive_frame(&mut self) -> Result<Frame> {
356        if self.finished {
357            return Err(Error::Eof);
358        }
359        let scheduler = self.scheduler.as_mut().ok_or(Error::NeedMore)?;
360
361        // Step the scheduler over one chunk-worth of samples; this may
362        // dispatch any number of events into the mixer. The scheduler
363        // is `drained` either when it just transitioned to done, or
364        // when it was already done coming into this call (we keep
365        // running the mixer-only tail in that case).
366        let was_done = scheduler.is_done();
367        let now_done = scheduler.step(FRAME_SAMPLES, &mut self.mixer, self.instrument.as_ref());
368        if was_done || now_done {
369            self.drained = true;
370        }
371
372        // Mix down whatever the pool currently holds.
373        let active = self.mixer.mix_stereo(&mut self.left, &mut self.right);
374
375        // Termination: scheduler done AND no live voices AND we've
376        // already burned at least one tail chunk. The tail-chunk cap
377        // keeps a never-finishing voice (looping sample with no
378        // release fired) from holding the decoder open forever.
379        if self.drained {
380            if active == 0 || self.tail_chunks_remaining == 0 {
381                self.finished = true;
382                // Still hand back this final chunk (silent or near-silent)
383                // — the caller can decide to discard it. Returning Eof
384                // here would lose any release-tail samples.
385                return Ok(self.build_audio_frame());
386            }
387            self.tail_chunks_remaining = self.tail_chunks_remaining.saturating_sub(1);
388        }
389
390        Ok(self.build_audio_frame())
391    }
392
393    fn flush(&mut self) -> Result<()> {
394        // Mark the scheduler done so subsequent receive_frame calls
395        // run only the release tail.
396        if let Some(s) = self.scheduler.as_mut() {
397            // Drain by stepping a huge amount of samples — every event
398            // will fire, and the cursor will advance to the end. This
399            // is cheaper than re-engineering the scheduler API around
400            // an explicit "skip to end" entry point.
401            s.step(u32::MAX as usize, &mut self.mixer, self.instrument.as_ref());
402        }
403        Ok(())
404    }
405
406    fn reset(&mut self) -> Result<()> {
407        self.scheduler = None;
408        self.mixer.all_notes_off();
409        self.next_pts = 0;
410        self.drained = false;
411        self.finished = false;
412        self.tail_chunks_remaining = Self::TAIL_CHUNK_CAP;
413        Ok(())
414    }
415}
416
417#[cfg(test)]
418mod tests {
419    use super::*;
420    use oxideav_core::TimeBase;
421
422    fn minimal_smf() -> Vec<u8> {
423        // MThd format-0, ntrks=1, division=96; one MTrk with EOT.
424        let mut b = vec![];
425        b.extend_from_slice(b"MThd");
426        b.extend_from_slice(&6u32.to_be_bytes());
427        b.extend_from_slice(&0u16.to_be_bytes());
428        b.extend_from_slice(&1u16.to_be_bytes());
429        b.extend_from_slice(&96u16.to_be_bytes());
430        b.extend_from_slice(b"MTrk");
431        b.extend_from_slice(&4u32.to_be_bytes());
432        b.extend_from_slice(&[0x00, 0xFF, 0x2F, 0x00]);
433        b
434    }
435
436    /// Build a 5-second SMF: tempo, two notes on channel 1, one note on
437    /// channel 10 (drums), a tempo change, and an EOT five seconds in.
438    fn five_second_smf() -> Vec<u8> {
439        // 480 ticks / qn at 120 BPM = 240 ticks / sec. Five seconds =
440        // 1200 ticks. Halfway tempo change (tick 600) to 250 000 us/qn
441        // (240 BPM) ⇒ second half is 240 ticks per second × 2 = 480
442        // ticks/sec — but we wrote 1200 ticks of "music" assuming the
443        // initial tempo so the wall-clock length will be ≈ 3.75 s, not
444        // a pure 5 s. That's fine: the test only asserts "non-silent
445        // PCM with a sensible duration", not exact timing.
446        let mut blob = Vec::new();
447        blob.extend_from_slice(b"MThd");
448        blob.extend_from_slice(&6u32.to_be_bytes());
449        blob.extend_from_slice(&1u16.to_be_bytes()); // format 1
450        blob.extend_from_slice(&3u16.to_be_bytes()); // 3 tracks
451        blob.extend_from_slice(&480u16.to_be_bytes()); // 480 tpqn
452
453        // Track 1: tempo + tempo change + EOT.
454        let mut t1: Vec<u8> = Vec::new();
455        // tick 0 set tempo 500_000 us/qn (= 120 BPM)
456        t1.extend_from_slice(&[0x00, 0xFF, 0x51, 0x03, 0x07, 0xA1, 0x20]);
457        // tick 600 set tempo 250_000 us/qn (= 240 BPM): VLQ(600) = [0x84, 0x58]
458        t1.extend_from_slice(&[0x84, 0x58, 0xFF, 0x51, 0x03, 0x03, 0xD0, 0x90]);
459        // tick 1200 EOT: VLQ(600) again
460        t1.extend_from_slice(&[0x84, 0x58, 0xFF, 0x2F, 0x00]);
461        push_track(&mut blob, &t1);
462
463        // Track 2: two notes on channel 1, played sequentially.
464        let mut t2: Vec<u8> = Vec::new();
465        // tick 0 note on chan 1 key 60 vel 100
466        t2.extend_from_slice(&[0x00, 0x91, 0x3C, 0x64]);
467        // tick 240 note off chan 1 key 60 vel 0; VLQ(240) = [0x81, 0x70]
468        t2.extend_from_slice(&[0x81, 0x70, 0x81, 0x3C, 0x40]);
469        // tick 240 + 0 note on chan 1 key 64 vel 100
470        t2.extend_from_slice(&[0x00, 0x91, 0x40, 0x64]);
471        // tick + 240 note off
472        t2.extend_from_slice(&[0x81, 0x70, 0x81, 0x40, 0x40]);
473        // tick + 720 EOT (so EOT at tick 1200): VLQ(720) = [0x85, 0x50]
474        t2.extend_from_slice(&[0x85, 0x50, 0xFF, 0x2F, 0x00]);
475        push_track(&mut blob, &t2);
476
477        // Track 3: one drum hit on channel 10 (index 9) — note 36 (kick).
478        let mut t3: Vec<u8> = Vec::new();
479        // tick 0 note on chan 9 key 36 vel 100
480        t3.extend_from_slice(&[0x00, 0x99, 0x24, 0x64]);
481        // tick 480 note off (VLQ 480 = [0x83, 0x60])
482        t3.extend_from_slice(&[0x83, 0x60, 0x89, 0x24, 0x40]);
483        // tick + 720 EOT
484        t3.extend_from_slice(&[0x85, 0x50, 0xFF, 0x2F, 0x00]);
485        push_track(&mut blob, &t3);
486
487        blob
488    }
489
490    fn push_track(blob: &mut Vec<u8>, events: &[u8]) {
491        blob.extend_from_slice(b"MTrk");
492        blob.extend_from_slice(&(events.len() as u32).to_be_bytes());
493        blob.extend_from_slice(events);
494    }
495
496    #[test]
497    fn registers_codec_under_midi_id() {
498        let mut reg = CodecRegistry::new();
499        register_codecs(&mut reg);
500        assert!(reg.has_decoder(&CodecId::new(CODEC_ID_STR)));
501    }
502
503    #[test]
504    fn decoder_rejects_non_smf_packets() {
505        let mut reg = CodecRegistry::new();
506        register_codecs(&mut reg);
507        let params = CodecParameters::audio(CodecId::new(CODEC_ID_STR));
508        let mut dec = reg.first_decoder(&params).unwrap();
509        let pkt = Packet::new(0, TimeBase::new(1, 44_100), b"not midi".to_vec());
510        let err = dec.send_packet(&pkt).unwrap_err();
511        assert!(matches!(err, Error::InvalidData(_)));
512    }
513
514    #[test]
515    fn empty_smf_produces_eof_after_initial_chunks() {
516        let mut reg = CodecRegistry::new();
517        register_codecs(&mut reg);
518        let params = CodecParameters::audio(CodecId::new(CODEC_ID_STR));
519        let mut dec = reg.first_decoder(&params).unwrap();
520        let pkt = Packet::new(0, TimeBase::new(1, 44_100), minimal_smf());
521        dec.send_packet(&pkt).unwrap();
522        // The empty-file SMF (one EOT, nothing else) drains immediately.
523        // We should get one final near-silent chunk and then Eof.
524        let _ = dec.receive_frame().expect("initial chunk");
525        // Subsequent calls return Eof.
526        let mut got_eof = false;
527        for _ in 0..4 {
528            match dec.receive_frame() {
529                Err(Error::Eof) => {
530                    got_eof = true;
531                    break;
532                }
533                Ok(_) => continue,
534                Err(other) => panic!("unexpected error {other:?}"),
535            }
536        }
537        assert!(got_eof, "decoder should drain to Eof on an empty SMF");
538    }
539
540    /// End-to-end: 5-second SMF with notes on channels 1 and 10 + a
541    /// tempo change → drives audio out via the tone fallback. Asserts
542    /// frame layout, non-silence, and a sensible peak amplitude.
543    #[test]
544    fn end_to_end_five_second_smf_produces_pcm() {
545        let mut dec = MidiDecoder::new(Arc::new(ToneInstrument::new()), OUTPUT_SAMPLE_RATE);
546        let blob = five_second_smf();
547        let pkt = Packet::new(0, TimeBase::new(1, 44_100), blob);
548        dec.send_packet(&pkt).unwrap();
549
550        let mut all_samples: Vec<i16> = Vec::new();
551        let mut frame_count = 0;
552        // Bounded loop — 44_100 * 6 / 1024 ≈ 258 chunks for 6 seconds
553        // of audio. Cap at 1024 so a misbehaving decoder can't hang.
554        for _ in 0..1024 {
555            match dec.receive_frame() {
556                Ok(Frame::Audio(af)) => {
557                    assert_eq!(af.samples, FRAME_SAMPLES as u32);
558                    assert_eq!(af.data.len(), 1, "interleaved S16 = single plane");
559                    let bytes = &af.data[0];
560                    assert_eq!(bytes.len(), FRAME_SAMPLES * 4, "stereo S16 = 4 bytes/frame");
561                    for chunk in bytes.chunks_exact(2) {
562                        all_samples.push(i16::from_le_bytes([chunk[0], chunk[1]]));
563                    }
564                    frame_count += 1;
565                }
566                Ok(_) => panic!("expected Audio frame"),
567                Err(Error::Eof) => break,
568                Err(other) => panic!("unexpected error: {other:?}"),
569            }
570        }
571
572        // We rendered both channels interleaved — divide by 2 to get
573        // per-channel sample count.
574        let per_channel = all_samples.len() / 2;
575        // The fixture runs through ~1200 ticks at a per-tick rate that
576        // halves halfway through (120 → 240 BPM tempo change).
577        //
578        //   first half : 600 ticks * 45.9375 samples/tick =  27 562 samples
579        //   second half: 600 ticks * 22.96875 samples/tick = 13 781 samples
580        //                                                    ─────────────
581        //   total music: ~41 344 samples (= ~0.94 s wall-clock)
582        //
583        // The release tails on the (already-done) tone voices are
584        // contained in this window. Lower bound: ≥ 30 000 samples
585        // (~680 ms) so a regression that emits a single chunk and
586        // quits is caught.
587        assert!(
588            per_channel >= 30_000,
589            "expected ≥ 30 k samples (~0.7 s) of audio, got {} samples / channel ({} frames)",
590            per_channel,
591            frame_count,
592        );
593
594        // Non-silence check: at least 5 % of samples must be non-zero.
595        let nonzero = all_samples.iter().filter(|s| s.abs() > 16).count();
596        let nonzero_ratio = nonzero as f64 / all_samples.len() as f64;
597        assert!(
598            nonzero_ratio > 0.05,
599            "audio is mostly silent: {:.2}% non-zero",
600            nonzero_ratio * 100.0,
601        );
602
603        // Peak amplitude check: must be audible (>= 1 % of i16 range)
604        // but must not have clipped (the fallback's headroom keeps it
605        // well under 0 dBFS).
606        let peak = all_samples
607            .iter()
608            .map(|s| s.unsigned_abs())
609            .max()
610            .unwrap_or(0);
611        assert!(
612            peak > 327,
613            "peak {} too quiet — synth is producing near-silent output",
614            peak,
615        );
616        assert!(
617            peak < 32_767,
618            "peak {} indicates clipping — mix bus should have headroom",
619            peak,
620        );
621    }
622
623    /// End-to-end with the round-2 SF2 fixture (a 20-frame sample-rate
624    /// 22 050 Hz looping ramp at root key 60). Exercises the full path
625    /// SMF → scheduler → SF2 voice generator → mixer → PCM.
626    #[test]
627    fn end_to_end_with_sf2_fixture() {
628        use crate::instruments::sf2::Sf2Instrument;
629        let blob = build_looping_sf2_fixture();
630        let inst = Sf2Instrument::from_bytes("fixture", &blob).expect("parse fixture");
631        let mut dec = MidiDecoder::new(Arc::new(inst), OUTPUT_SAMPLE_RATE);
632        let smf = five_second_smf();
633        let pkt = Packet::new(0, TimeBase::new(1, 44_100), smf);
634        dec.send_packet(&pkt).unwrap();
635
636        let mut all_samples: Vec<i16> = Vec::new();
637        for _ in 0..1024 {
638            match dec.receive_frame() {
639                Ok(Frame::Audio(af)) => {
640                    for chunk in af.data[0].chunks_exact(2) {
641                        all_samples.push(i16::from_le_bytes([chunk[0], chunk[1]]));
642                    }
643                }
644                Err(Error::Eof) => break,
645                Ok(_) => panic!("expected Audio frame"),
646                Err(other) => panic!("error: {other:?}"),
647            }
648        }
649        // Same per-channel lower bound as the tone-fallback test —
650        // music is ~0.94 s wall-clock.
651        assert!(
652            all_samples.len() / 2 >= 30_000,
653            "expected ≥ 30 k samples / channel, got {}",
654            all_samples.len() / 2,
655        );
656        let nonzero = all_samples.iter().filter(|s| s.abs() > 16).count();
657        assert!(
658            nonzero > all_samples.len() / 20,
659            "expected ≥ 5 % non-silent samples, got {} / {}",
660            nonzero,
661            all_samples.len(),
662        );
663        let peak = all_samples
664            .iter()
665            .map(|s| s.unsigned_abs())
666            .max()
667            .unwrap_or(0);
668        assert!(peak > 327, "SF2 fixture rendered too quiet (peak {peak})");
669    }
670
671    /// Build the same minimal looping SF2 the round-2 voice tests use:
672    /// one preset (program 0, bank 0), one instrument, one mono sample
673    /// — a 20-frame ramp at 22 050 Hz with `sampleModes=1` so the
674    /// voice keeps producing audio for the whole MIDI note duration.
675    /// Inlined here (rather than re-exported from `instruments::sf2`)
676    /// so the lib-level test stays self-contained.
677    fn build_looping_sf2_fixture() -> Vec<u8> {
678        // 20-frame ramp climbing from -8000 to +8000 in i16.
679        let mut smpl_bytes = Vec::with_capacity(40);
680        for i in 0i32..20 {
681            let v = (i * 800 - 8000) as i16;
682            smpl_bytes.extend_from_slice(&v.to_le_bytes());
683        }
684
685        // INFO list.
686        let mut info = Vec::new();
687        push_riff(&mut info, b"ifil", &[0x02, 0x00, 0x04, 0x00]); // 2.4
688        push_riff(&mut info, b"INAM", b"MidiTestBank\0");
689        let mut info_list = Vec::from(b"INFO" as &[u8]);
690        info_list.extend_from_slice(&info);
691
692        // sdta list.
693        let mut sdta = Vec::new();
694        push_riff(&mut sdta, b"smpl", &smpl_bytes);
695        let mut sdta_list = Vec::from(b"sdta" as &[u8]);
696        sdta_list.extend_from_slice(&sdta);
697
698        // pdta list. Generators: sampleModes=54, sampleID=53, instrument=41.
699        const GEN_SAMPLE_MODES: u16 = 54;
700        const GEN_SAMPLE_ID: u16 = 53;
701        const GEN_INSTRUMENT: u16 = 41;
702        let phdr = concat_records(&[
703            phdr_record("Test Preset", 0, 0, 0),
704            phdr_record("EOP", 0, 0, 1),
705        ]);
706        let pbag = concat_records(&[bag_record(0, 0), bag_record(1, 0)]);
707        let pmod = vec![0u8; 10];
708        let pgen = concat_records(&[gen_record(GEN_INSTRUMENT, 0), gen_record(0, 0)]);
709        let inst = concat_records(&[inst_record("Test Inst", 0), inst_record("EOI", 2)]);
710        let ibag = concat_records(&[bag_record(0, 0), bag_record(2, 0)]);
711        let imod = vec![0u8; 10];
712        let igen = concat_records(&[
713            gen_record(GEN_SAMPLE_MODES, 1),
714            gen_record(GEN_SAMPLE_ID, 0),
715            gen_record(0, 0),
716        ]);
717        let shdr = concat_records(&[
718            shdr_record("RampLoop", 0, 20, 5, 15, 22_050, 60, 0, 0, 1),
719            shdr_record("EOS", 0, 0, 0, 0, 0, 0, 0, 0, 0),
720        ]);
721
722        let mut pdta = Vec::new();
723        push_riff(&mut pdta, b"phdr", &phdr);
724        push_riff(&mut pdta, b"pbag", &pbag);
725        push_riff(&mut pdta, b"pmod", &pmod);
726        push_riff(&mut pdta, b"pgen", &pgen);
727        push_riff(&mut pdta, b"inst", &inst);
728        push_riff(&mut pdta, b"ibag", &ibag);
729        push_riff(&mut pdta, b"imod", &imod);
730        push_riff(&mut pdta, b"igen", &igen);
731        push_riff(&mut pdta, b"shdr", &shdr);
732        let mut pdta_list = Vec::from(b"pdta" as &[u8]);
733        pdta_list.extend_from_slice(&pdta);
734
735        // Outer RIFF/sfbk wrapper.
736        let mut body = Vec::from(b"sfbk" as &[u8]);
737        push_riff(&mut body, b"LIST", &info_list);
738        push_riff(&mut body, b"LIST", &sdta_list);
739        push_riff(&mut body, b"LIST", &pdta_list);
740        let mut out = Vec::from(b"RIFF" as &[u8]);
741        out.extend_from_slice(&(body.len() as u32).to_le_bytes());
742        out.extend_from_slice(&body);
743        out
744    }
745
746    fn push_riff(out: &mut Vec<u8>, tag: &[u8; 4], payload: &[u8]) {
747        out.extend_from_slice(tag);
748        out.extend_from_slice(&(payload.len() as u32).to_le_bytes());
749        out.extend_from_slice(payload);
750        if payload.len() % 2 == 1 {
751            out.push(0);
752        }
753    }
754
755    fn concat_records(rs: &[Vec<u8>]) -> Vec<u8> {
756        let mut out = Vec::new();
757        for r in rs {
758            out.extend_from_slice(r);
759        }
760        out
761    }
762
763    fn name20(s: &str) -> [u8; 20] {
764        let mut buf = [0u8; 20];
765        let bytes = s.as_bytes();
766        let n = bytes.len().min(19);
767        buf[..n].copy_from_slice(&bytes[..n]);
768        buf
769    }
770
771    fn phdr_record(name: &str, program: u16, bank: u16, pbag_start: u16) -> Vec<u8> {
772        let mut r = vec![0u8; 38];
773        r[0..20].copy_from_slice(&name20(name));
774        r[20..22].copy_from_slice(&program.to_le_bytes());
775        r[22..24].copy_from_slice(&bank.to_le_bytes());
776        r[24..26].copy_from_slice(&pbag_start.to_le_bytes());
777        r
778    }
779
780    fn inst_record(name: &str, ibag_start: u16) -> Vec<u8> {
781        let mut r = vec![0u8; 22];
782        r[0..20].copy_from_slice(&name20(name));
783        r[20..22].copy_from_slice(&ibag_start.to_le_bytes());
784        r
785    }
786
787    fn bag_record(gen_start: u16, mod_start: u16) -> Vec<u8> {
788        let mut r = vec![0u8; 4];
789        r[0..2].copy_from_slice(&gen_start.to_le_bytes());
790        r[2..4].copy_from_slice(&mod_start.to_le_bytes());
791        r
792    }
793
794    fn gen_record(oper: u16, amount: u16) -> Vec<u8> {
795        let mut r = vec![0u8; 4];
796        r[0..2].copy_from_slice(&oper.to_le_bytes());
797        r[2..4].copy_from_slice(&amount.to_le_bytes());
798        r
799    }
800
801    #[allow(clippy::too_many_arguments)]
802    fn shdr_record(
803        name: &str,
804        start: u32,
805        end: u32,
806        start_loop: u32,
807        end_loop: u32,
808        sample_rate: u32,
809        original_key: u8,
810        pitch_correction: i8,
811        sample_link: u16,
812        sample_type: u16,
813    ) -> Vec<u8> {
814        let mut r = vec![0u8; 46];
815        r[0..20].copy_from_slice(&name20(name));
816        r[20..24].copy_from_slice(&start.to_le_bytes());
817        r[24..28].copy_from_slice(&end.to_le_bytes());
818        r[28..32].copy_from_slice(&start_loop.to_le_bytes());
819        r[32..36].copy_from_slice(&end_loop.to_le_bytes());
820        r[36..40].copy_from_slice(&sample_rate.to_le_bytes());
821        r[40] = original_key;
822        r[41] = pitch_correction as u8;
823        r[42..44].copy_from_slice(&sample_link.to_le_bytes());
824        r[44..46].copy_from_slice(&sample_type.to_le_bytes());
825        r
826    }
827
828    /// End-to-end SMF with a pitch-bend event mid-note: feed the
829    /// decoder, check that the channel state's pitch bend changed by
830    /// the time the bend tick has fired.
831    #[test]
832    fn end_to_end_pitch_bend_event() {
833        let mut dec = MidiDecoder::new(Arc::new(ToneInstrument::new()), OUTPUT_SAMPLE_RATE);
834        let blob = pitch_bend_smf();
835        let pkt = Packet::new(0, TimeBase::new(1, 44_100), blob);
836        dec.send_packet(&pkt).unwrap();
837        // Pull frames until the scheduler has dispatched everything,
838        // including the pitch bend (located at tick 480, ≈ 23 k samples
839        // = ~22 chunks of 1024).
840        for _ in 0..64 {
841            match dec.receive_frame() {
842                Ok(_) => {}
843                Err(Error::Eof) => break,
844                Err(e) => panic!("unexpected: {e:?}"),
845            }
846        }
847        // Inspect the scheduler — the bend should have been applied.
848        // We can't poke the mixer directly through the decoder API; the
849        // test relies on the scheduler having walked past the event.
850        let s = dec.scheduler().unwrap();
851        assert!(s.is_done(), "scheduler should have drained the bend");
852    }
853
854    /// SMF with: tempo, note-on at tick 0, pitch-bend max-up at tick
855    /// 480, note-off at tick 960, EOT at tick 1200.
856    fn pitch_bend_smf() -> Vec<u8> {
857        let mut blob = Vec::new();
858        blob.extend_from_slice(b"MThd");
859        blob.extend_from_slice(&6u32.to_be_bytes());
860        blob.extend_from_slice(&0u16.to_be_bytes());
861        blob.extend_from_slice(&1u16.to_be_bytes());
862        blob.extend_from_slice(&480u16.to_be_bytes());
863
864        let mut t: Vec<u8> = Vec::new();
865        // tick 0 set tempo 500_000 us/qn (= 120 BPM).
866        t.extend_from_slice(&[0x00, 0xFF, 0x51, 0x03, 0x07, 0xA1, 0x20]);
867        // tick 0 note on chan 0 key 60 vel 100.
868        t.extend_from_slice(&[0x00, 0x90, 0x3C, 0x64]);
869        // tick 480 pitch bend max-up. VLQ(480) = 83 60.
870        t.extend_from_slice(&[0x83, 0x60, 0xE0, 0x7F, 0x7F]);
871        // tick 480 → tick 960: note-off. VLQ(480) = 83 60.
872        t.extend_from_slice(&[0x83, 0x60, 0x80, 0x3C, 0x40]);
873        // tick + 240 EOT. VLQ(240) = 81 70.
874        t.extend_from_slice(&[0x81, 0x70, 0xFF, 0x2F, 0x00]);
875        push_track(&mut blob, &t);
876        blob
877    }
878
879    /// End-to-end SMF with a channel-aftertouch event mid-note: assert
880    /// the decoder doesn't crash and audio still gets produced.
881    #[test]
882    fn end_to_end_channel_aftertouch_event() {
883        let mut dec = MidiDecoder::new(Arc::new(ToneInstrument::new()), OUTPUT_SAMPLE_RATE);
884        let blob = aftertouch_smf();
885        let pkt = Packet::new(0, TimeBase::new(1, 44_100), blob);
886        dec.send_packet(&pkt).unwrap();
887        let mut samples: Vec<i16> = Vec::new();
888        for _ in 0..64 {
889            match dec.receive_frame() {
890                Ok(Frame::Audio(af)) => {
891                    for chunk in af.data[0].chunks_exact(2) {
892                        samples.push(i16::from_le_bytes([chunk[0], chunk[1]]));
893                    }
894                }
895                Err(Error::Eof) => break,
896                Ok(_) => panic!("expected audio"),
897                Err(e) => panic!("unexpected: {e:?}"),
898            }
899        }
900        // We rendered audio.
901        assert!(!samples.is_empty(), "no audio rendered");
902        let nonzero = samples.iter().filter(|s| s.abs() > 16).count();
903        assert!(
904            nonzero > samples.len() / 20,
905            "expected ≥ 5 % non-silent: {} / {}",
906            nonzero,
907            samples.len(),
908        );
909    }
910
911    /// SMF with: tempo, note-on at tick 0, channel pressure at tick 240,
912    /// note-off at tick 480, EOT at tick 720.
913    fn aftertouch_smf() -> Vec<u8> {
914        let mut blob = Vec::new();
915        blob.extend_from_slice(b"MThd");
916        blob.extend_from_slice(&6u32.to_be_bytes());
917        blob.extend_from_slice(&0u16.to_be_bytes());
918        blob.extend_from_slice(&1u16.to_be_bytes());
919        blob.extend_from_slice(&480u16.to_be_bytes());
920
921        let mut t: Vec<u8> = Vec::new();
922        t.extend_from_slice(&[0x00, 0xFF, 0x51, 0x03, 0x07, 0xA1, 0x20]);
923        t.extend_from_slice(&[0x00, 0x90, 0x3C, 0x64]);
924        // VLQ(240) = 81 70. Channel pressure D0 with value 0x60.
925        t.extend_from_slice(&[0x81, 0x70, 0xD0, 0x60]);
926        // VLQ(240): note off.
927        t.extend_from_slice(&[0x81, 0x70, 0x80, 0x3C, 0x40]);
928        // VLQ(240): EOT.
929        t.extend_from_slice(&[0x81, 0x70, 0xFF, 0x2F, 0x00]);
930        push_track(&mut blob, &t);
931        blob
932    }
933
934    #[test]
935    fn reset_clears_scheduler_and_voices() {
936        let mut dec = MidiDecoder::new(Arc::new(ToneInstrument::new()), OUTPUT_SAMPLE_RATE);
937        let pkt = Packet::new(0, TimeBase::new(1, 44_100), five_second_smf());
938        dec.send_packet(&pkt).unwrap();
939        let _ = dec.receive_frame().unwrap();
940        dec.reset().unwrap();
941        // After reset, receive_frame returns NeedMore (no scheduler).
942        match dec.receive_frame() {
943            Err(Error::NeedMore) => {}
944            other => panic!("expected NeedMore after reset, got {other:?}"),
945        }
946    }
947}
oxideav_midi/lib.rs

oxideav_midi/
lib.rs