oxideav_midi/lib.rs
1//! MIDI — Standard MIDI File (SMF) parser + transport metadata + soft-synth.
2//!
3//! * **[`smf`]** — pure-Rust parser for the Standard MIDI File format
4//! (Type 0 / 1 / 2). Header (`MThd`) + tracks (`MTrk`) + every common
5//! channel-voice message, sysex (`F0` / `F7`), and meta event
6//! (tempo, time signature, key signature, text, marker, end-of-track,
7//! SMPTE offset, sequencer-specific). Running status is honoured;
8//! VLQs are bounded to 4 bytes per spec; chunk lengths are validated
9//! against remaining bytes; total events per file are capped at
10//! [`smf::MAX_EVENTS_PER_FILE`].
11//! * **[`paths`]** — per-OS SoundFont/SFZ/DLS search paths plus the
12//! `OXIDEAV_SOUNDFONT_PATH` environment override. `find_soundfonts`
13//! walks them and returns every instrument-bank file present.
14//! * **[`instruments`]** — [`instruments::Instrument`] trait. Three
15//! adapters:
16//! * **[`instruments::sf2`]** — full SoundFont 2 RIFF reader +
17//! voice generator. Walks the `sfbk` form, cross-resolves the
18//! preset → instrument → zone → sample chain, and renders
19//! sm24-aware 24-bit PCM at the requested pitch via linear
20//! interpolation. Honours the volume + modulation DAHDSR
21//! envelopes, the initial low-pass biquad filter, mod-env →
22//! pitch / filter routing, exclusive-class drum cuts, and
23//! native stereo zones.
24//! * **[`instruments::sfz`]** — text patch reader **plus voice
25//! generator**. Strips comments, walks `<control>` /
26//! `<global>` / `<master>` / `<group>` / `<region>` sections,
27//! flattens inheritance into one fully-resolved opcode map per
28//! region, and (via
29//! [`SfzInstrument::open`](instruments::sfz::SfzInstrument::open))
30//! reads every referenced sample off disk. Voice generation
31//! decodes the WAV sample bytes, picks the matching region by
32//! (key, velocity), shifts pitch off `pitch_keycenter` + `tune` +
33//! `transpose`, and runs a DAHDSR amplitude envelope
34//! (`ampeg_*`) + vibrato LFO (`lfo01_*`).
35//! * **[`instruments::dls`]** — DLS (Downloadable Sounds)
36//! Level 1 + Level 2 RIFF reader **plus voice generator**.
37//! Walks the `DLS ` form, parses the `colh` / `vers` / `ptbl`
38//! pool table / `lins-list` instrument table / `wvpl-list`
39//! wave pool, and surfaces a fully-cross-resolved
40//! [`DlsBank`](instruments::dls::DlsBank) of instruments →
41//! regions → wave-pool samples with their `wsmp` loops,
42//! `wlnk` cue references, and `art1` / `art2` articulation
43//! connection blocks. `make_voice` resolves the wlnk → ptbl →
44//! wave-pool entry, decodes the PCM, and plays the sample
45//! through [`SamplePlayer`](instruments::sample_voice::SamplePlayer).
46//! `art1`/`art2` connection-block evaluation is round 2.
47//! * **[`instruments::sample_voice`]** — shared sample-playback
48//! voice (mono in, mono out) used by both SFZ and DLS. Covers
49//! DAHDSR amplitude envelope, four loop modes, pitch bend,
50//! and a vibrato LFO.
51//! * **[`instruments::wav_pcm`]** — minimal RIFF/WAVE PCM
52//! decoder used by the SFZ and DLS sample loaders.
53//! * **[`instruments::tone`]** — sine/triangle/saw/square
54//! fallback so the synth produces *something* even when no
55//! on-disk bank is present.
56//! * **[`mixer`]** — polyphonic voice pool (32 voices) with stereo
57//! mixdown, per-channel volume / pan / sustain pedal handling, and
58//! oldest-voice preemption when the pool is full. Round 75 adds the
59//! full RPN 1 / RPN 2 / RPN 5 control surface (channel fine + coarse
60//! tune + modulation-depth range), CC 1 (mod wheel) → per-voice
61//! depth, CC 74 (MPE "third dimension") → per-voice timbre, the
62//! `MpeZone` / `MpeRole` topology built from MCM messages, and
63//! universal-SysEx-driven master volume / master fine / master
64//! coarse tuning that sum with per-channel tuning into the
65//! effective pitch each voice receives.
66//! * **[`scheduler`]** — SMF event scheduler. Merges every track into a
67//! single time-ordered stream, converts ticks → samples against the
68//! current tempo + division, and dispatches each event into the
69//! mixer at the right audio sample. Round 75 wires the Universal
70//! Real-Time / Non-Real-Time SysEx routing: GM 1 / GM 2 / GM Off
71//! reset, CA-25 Master Fine / Master Coarse Tuning, Master Volume,
72//! plus the CC 1 / CC 74 / MPE-MCM channel-CC paths.
73//! * **[`downloader`]** — stub that names a planned default bank
74//! (TimGM6mb) but currently returns [`Error::Unsupported`].
75//!
76//! The decoder factory ([`make_decoder`]) is registered under codec id
77//! [`CODEC_ID_STR`] = `"midi"`. Round-3 wires SMF events end-to-end:
78//! `send_packet` parses the SMF and primes the scheduler; `receive_frame`
79//! pulls one chunk of stereo PCM ([`FRAME_SAMPLES`] samples per channel
80//! at [`OUTPUT_SAMPLE_RATE`]) until both the event stream and the voice
81//! pool have run dry, then returns [`Error::Eof`].
82//!
83//! Without an instrument bank the decoder uses
84//! [`instruments::tone::ToneInstrument`] — the pure-tone fallback —
85//! so a `.mid` file plays back as audible-but-not-musical sine /
86//! triangle / square waves. To use a real bank, build the decoder by
87//! hand and pass an [`Sf2Instrument`](instruments::sf2::Sf2Instrument)
88//! to [`MidiDecoder::with_instrument`]; the decoder factory wired into
89//! the registry today does not yet plumb a bank-discovery hook.
90
91pub mod downloader;
92pub mod instruments;
93pub mod mixer;
94pub mod paths;
95pub mod scheduler;
96pub mod smf;
97pub mod tuning;
98
99use std::path::{Path, PathBuf};
100use std::sync::Arc;
101
102use oxideav_core::{
103 AudioFrame, CodecCapabilities, CodecId, CodecInfo, CodecParameters, CodecRegistry, Decoder,
104 Error, Frame, Packet, Result,
105};
106
107use crate::instruments::dls::DlsInstrument;
108use crate::instruments::sf2::Sf2Instrument;
109use crate::instruments::sfz::SfzInstrument;
110use crate::instruments::tone::ToneInstrument;
111use crate::instruments::Instrument;
112use crate::mixer::Mixer;
113use crate::scheduler::Scheduler;
114
115/// Public codec id string. Matches the aggregator feature name `midi`.
116pub const CODEC_ID_STR: &str = "midi";
117
118/// Round-3 audio output sample rate. Hard-coded to 44 100 Hz so the
119/// decoder doesn't need a parameter from the caller (the SMF container
120/// itself doesn't carry one). Round-4 may wire this through
121/// `CodecParameters::sample_rate`.
122pub const OUTPUT_SAMPLE_RATE: u32 = 44_100;
123
124/// Number of *per-channel* samples emitted per
125/// [`Decoder::receive_frame`] call. ~23 ms at 44.1 kHz — small enough
126/// for low playback latency, big enough that the per-call overhead is
127/// dwarfed by the inner mix loop.
128pub const FRAME_SAMPLES: usize = 1024;
129
130/// Channel count of the PCM output bus. Stereo. Same fixed assumption
131/// as [`OUTPUT_SAMPLE_RATE`].
132pub const OUTPUT_CHANNELS: u16 = 2;
133
134/// Register the MIDI codec. Round-3 produces interleaved S16 stereo
135/// PCM at [`OUTPUT_SAMPLE_RATE`] — the registry-built decoder uses the
136/// pure-tone fallback because we don't yet have a bank-discovery hook
137/// in the factory signature. Callers who want SoundFont 2 playback
138/// should build the decoder by hand via [`MidiDecoder::with_instrument`].
139pub fn register_codecs(reg: &mut CodecRegistry) {
140 let caps = CodecCapabilities::audio("midi_synth")
141 .with_lossy(false)
142 .with_lossless(true)
143 .with_intra_only(false)
144 .with_max_channels(OUTPUT_CHANNELS);
145 reg.register(
146 CodecInfo::new(CodecId::new(CODEC_ID_STR))
147 .capabilities(caps)
148 .decoder(make_decoder),
149 );
150}
151
152fn make_decoder(_params: &CodecParameters) -> Result<Box<dyn Decoder>> {
153 Ok(Box::new(MidiDecoder::new(
154 Arc::new(ToneInstrument::new()),
155 OUTPUT_SAMPLE_RATE,
156 )))
157}
158
159/// Soft-synth decoder: SMF in, interleaved S16 stereo PCM out.
160///
161/// Stateful — accepts exactly one SMF blob via [`send_packet`] and then
162/// streams audio frames out of [`receive_frame`] until both the event
163/// scheduler and the voice pool have run dry, at which point
164/// [`Error::Eof`] is returned. Calling `send_packet` again replaces the
165/// scheduler with a fresh one (re-priming for a new file).
166///
167/// State that survives across `receive_frame` calls:
168/// * the merged event list + cursor + sample clock (in [`Scheduler`])
169/// * the voice pool + per-channel CC state (in [`Mixer`])
170/// * a small carry-over flag that lets the decoder render a few
171/// extra trailing chunks after the last event so release tails
172/// don't get cut off mid-envelope.
173///
174/// [`send_packet`]: Decoder::send_packet
175/// [`receive_frame`]: Decoder::receive_frame
176pub struct MidiDecoder {
177 codec_id: CodecId,
178 instrument: Arc<dyn Instrument>,
179 sample_rate: u32,
180 /// `None` until the first `send_packet` arrives.
181 scheduler: Option<Scheduler>,
182 mixer: Mixer,
183 /// Scratch stereo planes — reused across `receive_frame` calls so
184 /// we don't reallocate on every chunk.
185 left: Vec<f32>,
186 right: Vec<f32>,
187 /// Sample PTS of the next emitted frame (in `1/sample_rate` units).
188 next_pts: i64,
189 /// Set once the scheduler has run dry; we keep emitting frames
190 /// until the voice pool falls silent too.
191 drained: bool,
192 /// Set once we've returned `Error::Eof` once — subsequent calls
193 /// keep returning `Eof`.
194 finished: bool,
195 /// Bound on extra "tail" chunks emitted after the scheduler is done
196 /// but voices may still be releasing. Worst-case the longest
197 /// release in [`Sf2Voice`](instruments::sf2::Sf2Voice) is 50 ms = 3
198 /// chunks at 1024 samples / 44.1 kHz; tone voices are 100 ms = 5
199 /// chunks. Bound generously at 32 to also cover a long looping
200 /// sample whose release window is unusually long.
201 tail_chunks_remaining: usize,
202}
203
204impl MidiDecoder {
205 /// Hard cap on how many extra audio chunks we'll emit after the
206 /// last SMF event has fired. Voice release tails (50–100 ms with
207 /// the round-2/3 envelopes) live inside this budget; without it,
208 /// a malformed or never-releasing voice could keep the decoder
209 /// emitting forever.
210 pub const TAIL_CHUNK_CAP: usize = 32;
211
212 /// Build a decoder bound to a specific instrument and sample rate.
213 /// Use this directly when you have a SoundFont 2 bank loaded and
214 /// want to drive the synth with it; the [`make_decoder`] factory
215 /// (called by the codec registry) builds one with the pure-tone
216 /// fallback because there's no instrument-discovery plumbing in
217 /// the factory signature yet.
218 pub fn new(instrument: Arc<dyn Instrument>, sample_rate: u32) -> Self {
219 Self {
220 codec_id: CodecId::new(CODEC_ID_STR),
221 instrument,
222 sample_rate,
223 scheduler: None,
224 mixer: Mixer::new(),
225 left: vec![0.0; FRAME_SAMPLES],
226 right: vec![0.0; FRAME_SAMPLES],
227 next_pts: 0,
228 drained: false,
229 finished: false,
230 tail_chunks_remaining: Self::TAIL_CHUNK_CAP,
231 }
232 }
233
234 /// Convenience constructor: same as [`new`](Self::new) but takes a
235 /// concrete [`Instrument`] by value and wraps it in an `Arc`.
236 pub fn with_instrument(instrument: Arc<dyn Instrument>) -> Self {
237 Self::new(instrument, OUTPUT_SAMPLE_RATE)
238 }
239
240 /// Build a decoder bound to an instrument loaded from a path on
241 /// disk. The format is dispatched by [`InstrumentSource`] so the
242 /// caller picks SFZ / SF2 / DLS explicitly (file extensions are
243 /// not always reliable indicators).
244 pub fn with_instrument_source(source: InstrumentSource) -> Result<Self> {
245 let inst = source.load()?;
246 Ok(Self::new(inst, OUTPUT_SAMPLE_RATE))
247 }
248}
249
250/// Source descriptor for an external instrument bank. Paired with
251/// [`MidiDecoder::with_instrument_source`] so a caller that only knows
252/// "I have an SF2 file at this path" doesn't have to type the full
253/// `Arc::new(Sf2Instrument::open(...))` chain.
254#[derive(Clone, Debug)]
255pub enum InstrumentSource {
256 /// SoundFont 2 bank — loaded via [`Sf2Instrument::open`].
257 Sf2(PathBuf),
258 /// SFZ text patch — loaded via [`SfzInstrument::open`] (samples
259 /// are read off disk relative to the patch's directory).
260 Sfz(PathBuf),
261 /// DLS Level 1 / 2 bank — loaded via [`DlsInstrument::open`].
262 Dls(PathBuf),
263 /// Pure-tone fallback (no on-disk file).
264 Tone,
265}
266
267impl InstrumentSource {
268 /// Load the bank at the named path and wrap it in an `Arc<dyn
269 /// Instrument>`. Returns the underlying error (file-not-found,
270 /// invalid magic, malformed RIFF chunk, etc.) on failure.
271 pub fn load(self) -> Result<Arc<dyn Instrument>> {
272 match self {
273 InstrumentSource::Sf2(p) => Ok(Arc::new(Sf2Instrument::open(&p)?)),
274 InstrumentSource::Sfz(p) => Ok(Arc::new(SfzInstrument::open(&p)?)),
275 InstrumentSource::Dls(p) => Ok(Arc::new(DlsInstrument::open(&p)?)),
276 InstrumentSource::Tone => Ok(Arc::new(ToneInstrument::new())),
277 }
278 }
279
280 /// Convenience constructors mirroring the enum variants. Each
281 /// takes anything `Path`-convertible.
282 pub fn sf2(path: impl AsRef<Path>) -> Self {
283 Self::Sf2(path.as_ref().to_path_buf())
284 }
285 pub fn sfz(path: impl AsRef<Path>) -> Self {
286 Self::Sfz(path.as_ref().to_path_buf())
287 }
288 pub fn dls(path: impl AsRef<Path>) -> Self {
289 Self::Dls(path.as_ref().to_path_buf())
290 }
291}
292
293impl MidiDecoder {
294 /// Sample rate the decoder is rendering at. Equal to whatever was
295 /// passed to [`new`](Self::new) (default [`OUTPUT_SAMPLE_RATE`] when
296 /// constructed via the registry).
297 pub fn sample_rate(&self) -> u32 {
298 self.sample_rate
299 }
300
301 /// Borrow the active scheduler — `None` until the first
302 /// `send_packet`. Useful for diagnostics + tests.
303 pub fn scheduler(&self) -> Option<&Scheduler> {
304 self.scheduler.as_ref()
305 }
306
307 /// Convert the planar stereo `(left, right)` buffers into one
308 /// interleaved S16 [`AudioFrame`].
309 fn build_audio_frame(&mut self) -> Frame {
310 let n = self.left.len();
311 let mut bytes = Vec::with_capacity(n * 2 * 2); // 2 bytes/sample × 2 channels
312 for i in 0..n {
313 let l = (self.left[i].clamp(-1.0, 1.0) * 32_767.0) as i16;
314 let r = (self.right[i].clamp(-1.0, 1.0) * 32_767.0) as i16;
315 bytes.extend_from_slice(&l.to_le_bytes());
316 bytes.extend_from_slice(&r.to_le_bytes());
317 }
318 let pts = Some(self.next_pts);
319 self.next_pts = self.next_pts.saturating_add(n as i64);
320 Frame::Audio(AudioFrame {
321 samples: n as u32,
322 pts,
323 data: vec![bytes],
324 })
325 }
326}
327
328impl Decoder for MidiDecoder {
329 fn codec_id(&self) -> &CodecId {
330 &self.codec_id
331 }
332
333 fn send_packet(&mut self, packet: &Packet) -> Result<()> {
334 // Confirm the packet at least *looks* like an SMF — saves the
335 // user from a "synthesis pending" misdiagnosis when the real
336 // issue is a mis-routed packet.
337 if packet.data.len() < 4 || &packet.data[0..4] != b"MThd" {
338 return Err(Error::invalid(
339 "MIDI: packet does not start with the 'MThd' header chunk",
340 ));
341 }
342 let smf = crate::smf::parse(&packet.data)?;
343 // Prime the scheduler. Dropping the previous one (if any)
344 // discards any partially-played file — callers should call
345 // `flush` first if that matters.
346 self.scheduler = Some(Scheduler::new(&smf, self.sample_rate));
347 self.mixer.all_notes_off();
348 self.next_pts = 0;
349 self.drained = false;
350 self.finished = false;
351 self.tail_chunks_remaining = Self::TAIL_CHUNK_CAP;
352 Ok(())
353 }
354
355 fn receive_frame(&mut self) -> Result<Frame> {
356 if self.finished {
357 return Err(Error::Eof);
358 }
359 let scheduler = self.scheduler.as_mut().ok_or(Error::NeedMore)?;
360
361 // Step the scheduler over one chunk-worth of samples; this may
362 // dispatch any number of events into the mixer. The scheduler
363 // is `drained` either when it just transitioned to done, or
364 // when it was already done coming into this call (we keep
365 // running the mixer-only tail in that case).
366 let was_done = scheduler.is_done();
367 let now_done = scheduler.step(FRAME_SAMPLES, &mut self.mixer, self.instrument.as_ref());
368 if was_done || now_done {
369 self.drained = true;
370 }
371
372 // Mix down whatever the pool currently holds.
373 let active = self.mixer.mix_stereo(&mut self.left, &mut self.right);
374
375 // Termination: scheduler done AND no live voices AND we've
376 // already burned at least one tail chunk. The tail-chunk cap
377 // keeps a never-finishing voice (looping sample with no
378 // release fired) from holding the decoder open forever.
379 if self.drained {
380 if active == 0 || self.tail_chunks_remaining == 0 {
381 self.finished = true;
382 // Still hand back this final chunk (silent or near-silent)
383 // — the caller can decide to discard it. Returning Eof
384 // here would lose any release-tail samples.
385 return Ok(self.build_audio_frame());
386 }
387 self.tail_chunks_remaining = self.tail_chunks_remaining.saturating_sub(1);
388 }
389
390 Ok(self.build_audio_frame())
391 }
392
393 fn flush(&mut self) -> Result<()> {
394 // Mark the scheduler done so subsequent receive_frame calls
395 // run only the release tail.
396 if let Some(s) = self.scheduler.as_mut() {
397 // Drain by stepping a huge amount of samples — every event
398 // will fire, and the cursor will advance to the end. This
399 // is cheaper than re-engineering the scheduler API around
400 // an explicit "skip to end" entry point.
401 s.step(u32::MAX as usize, &mut self.mixer, self.instrument.as_ref());
402 }
403 Ok(())
404 }
405
406 fn reset(&mut self) -> Result<()> {
407 self.scheduler = None;
408 self.mixer.all_notes_off();
409 self.next_pts = 0;
410 self.drained = false;
411 self.finished = false;
412 self.tail_chunks_remaining = Self::TAIL_CHUNK_CAP;
413 Ok(())
414 }
415}
416
417#[cfg(test)]
418mod tests {
419 use super::*;
420 use oxideav_core::TimeBase;
421
422 fn minimal_smf() -> Vec<u8> {
423 // MThd format-0, ntrks=1, division=96; one MTrk with EOT.
424 let mut b = vec![];
425 b.extend_from_slice(b"MThd");
426 b.extend_from_slice(&6u32.to_be_bytes());
427 b.extend_from_slice(&0u16.to_be_bytes());
428 b.extend_from_slice(&1u16.to_be_bytes());
429 b.extend_from_slice(&96u16.to_be_bytes());
430 b.extend_from_slice(b"MTrk");
431 b.extend_from_slice(&4u32.to_be_bytes());
432 b.extend_from_slice(&[0x00, 0xFF, 0x2F, 0x00]);
433 b
434 }
435
436 /// Build a 5-second SMF: tempo, two notes on channel 1, one note on
437 /// channel 10 (drums), a tempo change, and an EOT five seconds in.
438 fn five_second_smf() -> Vec<u8> {
439 // 480 ticks / qn at 120 BPM = 240 ticks / sec. Five seconds =
440 // 1200 ticks. Halfway tempo change (tick 600) to 250 000 us/qn
441 // (240 BPM) ⇒ second half is 240 ticks per second × 2 = 480
442 // ticks/sec — but we wrote 1200 ticks of "music" assuming the
443 // initial tempo so the wall-clock length will be ≈ 3.75 s, not
444 // a pure 5 s. That's fine: the test only asserts "non-silent
445 // PCM with a sensible duration", not exact timing.
446 let mut blob = Vec::new();
447 blob.extend_from_slice(b"MThd");
448 blob.extend_from_slice(&6u32.to_be_bytes());
449 blob.extend_from_slice(&1u16.to_be_bytes()); // format 1
450 blob.extend_from_slice(&3u16.to_be_bytes()); // 3 tracks
451 blob.extend_from_slice(&480u16.to_be_bytes()); // 480 tpqn
452
453 // Track 1: tempo + tempo change + EOT.
454 let mut t1: Vec<u8> = Vec::new();
455 // tick 0 set tempo 500_000 us/qn (= 120 BPM)
456 t1.extend_from_slice(&[0x00, 0xFF, 0x51, 0x03, 0x07, 0xA1, 0x20]);
457 // tick 600 set tempo 250_000 us/qn (= 240 BPM): VLQ(600) = [0x84, 0x58]
458 t1.extend_from_slice(&[0x84, 0x58, 0xFF, 0x51, 0x03, 0x03, 0xD0, 0x90]);
459 // tick 1200 EOT: VLQ(600) again
460 t1.extend_from_slice(&[0x84, 0x58, 0xFF, 0x2F, 0x00]);
461 push_track(&mut blob, &t1);
462
463 // Track 2: two notes on channel 1, played sequentially.
464 let mut t2: Vec<u8> = Vec::new();
465 // tick 0 note on chan 1 key 60 vel 100
466 t2.extend_from_slice(&[0x00, 0x91, 0x3C, 0x64]);
467 // tick 240 note off chan 1 key 60 vel 0; VLQ(240) = [0x81, 0x70]
468 t2.extend_from_slice(&[0x81, 0x70, 0x81, 0x3C, 0x40]);
469 // tick 240 + 0 note on chan 1 key 64 vel 100
470 t2.extend_from_slice(&[0x00, 0x91, 0x40, 0x64]);
471 // tick + 240 note off
472 t2.extend_from_slice(&[0x81, 0x70, 0x81, 0x40, 0x40]);
473 // tick + 720 EOT (so EOT at tick 1200): VLQ(720) = [0x85, 0x50]
474 t2.extend_from_slice(&[0x85, 0x50, 0xFF, 0x2F, 0x00]);
475 push_track(&mut blob, &t2);
476
477 // Track 3: one drum hit on channel 10 (index 9) — note 36 (kick).
478 let mut t3: Vec<u8> = Vec::new();
479 // tick 0 note on chan 9 key 36 vel 100
480 t3.extend_from_slice(&[0x00, 0x99, 0x24, 0x64]);
481 // tick 480 note off (VLQ 480 = [0x83, 0x60])
482 t3.extend_from_slice(&[0x83, 0x60, 0x89, 0x24, 0x40]);
483 // tick + 720 EOT
484 t3.extend_from_slice(&[0x85, 0x50, 0xFF, 0x2F, 0x00]);
485 push_track(&mut blob, &t3);
486
487 blob
488 }
489
490 fn push_track(blob: &mut Vec<u8>, events: &[u8]) {
491 blob.extend_from_slice(b"MTrk");
492 blob.extend_from_slice(&(events.len() as u32).to_be_bytes());
493 blob.extend_from_slice(events);
494 }
495
496 #[test]
497 fn registers_codec_under_midi_id() {
498 let mut reg = CodecRegistry::new();
499 register_codecs(&mut reg);
500 assert!(reg.has_decoder(&CodecId::new(CODEC_ID_STR)));
501 }
502
503 #[test]
504 fn decoder_rejects_non_smf_packets() {
505 let mut reg = CodecRegistry::new();
506 register_codecs(&mut reg);
507 let params = CodecParameters::audio(CodecId::new(CODEC_ID_STR));
508 let mut dec = reg.first_decoder(¶ms).unwrap();
509 let pkt = Packet::new(0, TimeBase::new(1, 44_100), b"not midi".to_vec());
510 let err = dec.send_packet(&pkt).unwrap_err();
511 assert!(matches!(err, Error::InvalidData(_)));
512 }
513
514 #[test]
515 fn empty_smf_produces_eof_after_initial_chunks() {
516 let mut reg = CodecRegistry::new();
517 register_codecs(&mut reg);
518 let params = CodecParameters::audio(CodecId::new(CODEC_ID_STR));
519 let mut dec = reg.first_decoder(¶ms).unwrap();
520 let pkt = Packet::new(0, TimeBase::new(1, 44_100), minimal_smf());
521 dec.send_packet(&pkt).unwrap();
522 // The empty-file SMF (one EOT, nothing else) drains immediately.
523 // We should get one final near-silent chunk and then Eof.
524 let _ = dec.receive_frame().expect("initial chunk");
525 // Subsequent calls return Eof.
526 let mut got_eof = false;
527 for _ in 0..4 {
528 match dec.receive_frame() {
529 Err(Error::Eof) => {
530 got_eof = true;
531 break;
532 }
533 Ok(_) => continue,
534 Err(other) => panic!("unexpected error {other:?}"),
535 }
536 }
537 assert!(got_eof, "decoder should drain to Eof on an empty SMF");
538 }
539
540 /// End-to-end: 5-second SMF with notes on channels 1 and 10 + a
541 /// tempo change → drives audio out via the tone fallback. Asserts
542 /// frame layout, non-silence, and a sensible peak amplitude.
543 #[test]
544 fn end_to_end_five_second_smf_produces_pcm() {
545 let mut dec = MidiDecoder::new(Arc::new(ToneInstrument::new()), OUTPUT_SAMPLE_RATE);
546 let blob = five_second_smf();
547 let pkt = Packet::new(0, TimeBase::new(1, 44_100), blob);
548 dec.send_packet(&pkt).unwrap();
549
550 let mut all_samples: Vec<i16> = Vec::new();
551 let mut frame_count = 0;
552 // Bounded loop — 44_100 * 6 / 1024 ≈ 258 chunks for 6 seconds
553 // of audio. Cap at 1024 so a misbehaving decoder can't hang.
554 for _ in 0..1024 {
555 match dec.receive_frame() {
556 Ok(Frame::Audio(af)) => {
557 assert_eq!(af.samples, FRAME_SAMPLES as u32);
558 assert_eq!(af.data.len(), 1, "interleaved S16 = single plane");
559 let bytes = &af.data[0];
560 assert_eq!(bytes.len(), FRAME_SAMPLES * 4, "stereo S16 = 4 bytes/frame");
561 for chunk in bytes.chunks_exact(2) {
562 all_samples.push(i16::from_le_bytes([chunk[0], chunk[1]]));
563 }
564 frame_count += 1;
565 }
566 Ok(_) => panic!("expected Audio frame"),
567 Err(Error::Eof) => break,
568 Err(other) => panic!("unexpected error: {other:?}"),
569 }
570 }
571
572 // We rendered both channels interleaved — divide by 2 to get
573 // per-channel sample count.
574 let per_channel = all_samples.len() / 2;
575 // The fixture runs through ~1200 ticks at a per-tick rate that
576 // halves halfway through (120 → 240 BPM tempo change).
577 //
578 // first half : 600 ticks * 45.9375 samples/tick = 27 562 samples
579 // second half: 600 ticks * 22.96875 samples/tick = 13 781 samples
580 // ─────────────
581 // total music: ~41 344 samples (= ~0.94 s wall-clock)
582 //
583 // The release tails on the (already-done) tone voices are
584 // contained in this window. Lower bound: ≥ 30 000 samples
585 // (~680 ms) so a regression that emits a single chunk and
586 // quits is caught.
587 assert!(
588 per_channel >= 30_000,
589 "expected ≥ 30 k samples (~0.7 s) of audio, got {} samples / channel ({} frames)",
590 per_channel,
591 frame_count,
592 );
593
594 // Non-silence check: at least 5 % of samples must be non-zero.
595 let nonzero = all_samples.iter().filter(|s| s.abs() > 16).count();
596 let nonzero_ratio = nonzero as f64 / all_samples.len() as f64;
597 assert!(
598 nonzero_ratio > 0.05,
599 "audio is mostly silent: {:.2}% non-zero",
600 nonzero_ratio * 100.0,
601 );
602
603 // Peak amplitude check: must be audible (>= 1 % of i16 range)
604 // but must not have clipped (the fallback's headroom keeps it
605 // well under 0 dBFS).
606 let peak = all_samples
607 .iter()
608 .map(|s| s.unsigned_abs())
609 .max()
610 .unwrap_or(0);
611 assert!(
612 peak > 327,
613 "peak {} too quiet — synth is producing near-silent output",
614 peak,
615 );
616 assert!(
617 peak < 32_767,
618 "peak {} indicates clipping — mix bus should have headroom",
619 peak,
620 );
621 }
622
623 /// End-to-end with the round-2 SF2 fixture (a 20-frame sample-rate
624 /// 22 050 Hz looping ramp at root key 60). Exercises the full path
625 /// SMF → scheduler → SF2 voice generator → mixer → PCM.
626 #[test]
627 fn end_to_end_with_sf2_fixture() {
628 use crate::instruments::sf2::Sf2Instrument;
629 let blob = build_looping_sf2_fixture();
630 let inst = Sf2Instrument::from_bytes("fixture", &blob).expect("parse fixture");
631 let mut dec = MidiDecoder::new(Arc::new(inst), OUTPUT_SAMPLE_RATE);
632 let smf = five_second_smf();
633 let pkt = Packet::new(0, TimeBase::new(1, 44_100), smf);
634 dec.send_packet(&pkt).unwrap();
635
636 let mut all_samples: Vec<i16> = Vec::new();
637 for _ in 0..1024 {
638 match dec.receive_frame() {
639 Ok(Frame::Audio(af)) => {
640 for chunk in af.data[0].chunks_exact(2) {
641 all_samples.push(i16::from_le_bytes([chunk[0], chunk[1]]));
642 }
643 }
644 Err(Error::Eof) => break,
645 Ok(_) => panic!("expected Audio frame"),
646 Err(other) => panic!("error: {other:?}"),
647 }
648 }
649 // Same per-channel lower bound as the tone-fallback test —
650 // music is ~0.94 s wall-clock.
651 assert!(
652 all_samples.len() / 2 >= 30_000,
653 "expected ≥ 30 k samples / channel, got {}",
654 all_samples.len() / 2,
655 );
656 let nonzero = all_samples.iter().filter(|s| s.abs() > 16).count();
657 assert!(
658 nonzero > all_samples.len() / 20,
659 "expected ≥ 5 % non-silent samples, got {} / {}",
660 nonzero,
661 all_samples.len(),
662 );
663 let peak = all_samples
664 .iter()
665 .map(|s| s.unsigned_abs())
666 .max()
667 .unwrap_or(0);
668 assert!(peak > 327, "SF2 fixture rendered too quiet (peak {peak})");
669 }
670
671 /// Build the same minimal looping SF2 the round-2 voice tests use:
672 /// one preset (program 0, bank 0), one instrument, one mono sample
673 /// — a 20-frame ramp at 22 050 Hz with `sampleModes=1` so the
674 /// voice keeps producing audio for the whole MIDI note duration.
675 /// Inlined here (rather than re-exported from `instruments::sf2`)
676 /// so the lib-level test stays self-contained.
677 fn build_looping_sf2_fixture() -> Vec<u8> {
678 // 20-frame ramp climbing from -8000 to +8000 in i16.
679 let mut smpl_bytes = Vec::with_capacity(40);
680 for i in 0i32..20 {
681 let v = (i * 800 - 8000) as i16;
682 smpl_bytes.extend_from_slice(&v.to_le_bytes());
683 }
684
685 // INFO list.
686 let mut info = Vec::new();
687 push_riff(&mut info, b"ifil", &[0x02, 0x00, 0x04, 0x00]); // 2.4
688 push_riff(&mut info, b"INAM", b"MidiTestBank\0");
689 let mut info_list = Vec::from(b"INFO" as &[u8]);
690 info_list.extend_from_slice(&info);
691
692 // sdta list.
693 let mut sdta = Vec::new();
694 push_riff(&mut sdta, b"smpl", &smpl_bytes);
695 let mut sdta_list = Vec::from(b"sdta" as &[u8]);
696 sdta_list.extend_from_slice(&sdta);
697
698 // pdta list. Generators: sampleModes=54, sampleID=53, instrument=41.
699 const GEN_SAMPLE_MODES: u16 = 54;
700 const GEN_SAMPLE_ID: u16 = 53;
701 const GEN_INSTRUMENT: u16 = 41;
702 let phdr = concat_records(&[
703 phdr_record("Test Preset", 0, 0, 0),
704 phdr_record("EOP", 0, 0, 1),
705 ]);
706 let pbag = concat_records(&[bag_record(0, 0), bag_record(1, 0)]);
707 let pmod = vec![0u8; 10];
708 let pgen = concat_records(&[gen_record(GEN_INSTRUMENT, 0), gen_record(0, 0)]);
709 let inst = concat_records(&[inst_record("Test Inst", 0), inst_record("EOI", 2)]);
710 let ibag = concat_records(&[bag_record(0, 0), bag_record(2, 0)]);
711 let imod = vec![0u8; 10];
712 let igen = concat_records(&[
713 gen_record(GEN_SAMPLE_MODES, 1),
714 gen_record(GEN_SAMPLE_ID, 0),
715 gen_record(0, 0),
716 ]);
717 let shdr = concat_records(&[
718 shdr_record("RampLoop", 0, 20, 5, 15, 22_050, 60, 0, 0, 1),
719 shdr_record("EOS", 0, 0, 0, 0, 0, 0, 0, 0, 0),
720 ]);
721
722 let mut pdta = Vec::new();
723 push_riff(&mut pdta, b"phdr", &phdr);
724 push_riff(&mut pdta, b"pbag", &pbag);
725 push_riff(&mut pdta, b"pmod", &pmod);
726 push_riff(&mut pdta, b"pgen", &pgen);
727 push_riff(&mut pdta, b"inst", &inst);
728 push_riff(&mut pdta, b"ibag", &ibag);
729 push_riff(&mut pdta, b"imod", &imod);
730 push_riff(&mut pdta, b"igen", &igen);
731 push_riff(&mut pdta, b"shdr", &shdr);
732 let mut pdta_list = Vec::from(b"pdta" as &[u8]);
733 pdta_list.extend_from_slice(&pdta);
734
735 // Outer RIFF/sfbk wrapper.
736 let mut body = Vec::from(b"sfbk" as &[u8]);
737 push_riff(&mut body, b"LIST", &info_list);
738 push_riff(&mut body, b"LIST", &sdta_list);
739 push_riff(&mut body, b"LIST", &pdta_list);
740 let mut out = Vec::from(b"RIFF" as &[u8]);
741 out.extend_from_slice(&(body.len() as u32).to_le_bytes());
742 out.extend_from_slice(&body);
743 out
744 }
745
746 fn push_riff(out: &mut Vec<u8>, tag: &[u8; 4], payload: &[u8]) {
747 out.extend_from_slice(tag);
748 out.extend_from_slice(&(payload.len() as u32).to_le_bytes());
749 out.extend_from_slice(payload);
750 if payload.len() % 2 == 1 {
751 out.push(0);
752 }
753 }
754
755 fn concat_records(rs: &[Vec<u8>]) -> Vec<u8> {
756 let mut out = Vec::new();
757 for r in rs {
758 out.extend_from_slice(r);
759 }
760 out
761 }
762
763 fn name20(s: &str) -> [u8; 20] {
764 let mut buf = [0u8; 20];
765 let bytes = s.as_bytes();
766 let n = bytes.len().min(19);
767 buf[..n].copy_from_slice(&bytes[..n]);
768 buf
769 }
770
771 fn phdr_record(name: &str, program: u16, bank: u16, pbag_start: u16) -> Vec<u8> {
772 let mut r = vec![0u8; 38];
773 r[0..20].copy_from_slice(&name20(name));
774 r[20..22].copy_from_slice(&program.to_le_bytes());
775 r[22..24].copy_from_slice(&bank.to_le_bytes());
776 r[24..26].copy_from_slice(&pbag_start.to_le_bytes());
777 r
778 }
779
780 fn inst_record(name: &str, ibag_start: u16) -> Vec<u8> {
781 let mut r = vec![0u8; 22];
782 r[0..20].copy_from_slice(&name20(name));
783 r[20..22].copy_from_slice(&ibag_start.to_le_bytes());
784 r
785 }
786
787 fn bag_record(gen_start: u16, mod_start: u16) -> Vec<u8> {
788 let mut r = vec![0u8; 4];
789 r[0..2].copy_from_slice(&gen_start.to_le_bytes());
790 r[2..4].copy_from_slice(&mod_start.to_le_bytes());
791 r
792 }
793
794 fn gen_record(oper: u16, amount: u16) -> Vec<u8> {
795 let mut r = vec![0u8; 4];
796 r[0..2].copy_from_slice(&oper.to_le_bytes());
797 r[2..4].copy_from_slice(&amount.to_le_bytes());
798 r
799 }
800
801 #[allow(clippy::too_many_arguments)]
802 fn shdr_record(
803 name: &str,
804 start: u32,
805 end: u32,
806 start_loop: u32,
807 end_loop: u32,
808 sample_rate: u32,
809 original_key: u8,
810 pitch_correction: i8,
811 sample_link: u16,
812 sample_type: u16,
813 ) -> Vec<u8> {
814 let mut r = vec![0u8; 46];
815 r[0..20].copy_from_slice(&name20(name));
816 r[20..24].copy_from_slice(&start.to_le_bytes());
817 r[24..28].copy_from_slice(&end.to_le_bytes());
818 r[28..32].copy_from_slice(&start_loop.to_le_bytes());
819 r[32..36].copy_from_slice(&end_loop.to_le_bytes());
820 r[36..40].copy_from_slice(&sample_rate.to_le_bytes());
821 r[40] = original_key;
822 r[41] = pitch_correction as u8;
823 r[42..44].copy_from_slice(&sample_link.to_le_bytes());
824 r[44..46].copy_from_slice(&sample_type.to_le_bytes());
825 r
826 }
827
828 /// End-to-end SMF with a pitch-bend event mid-note: feed the
829 /// decoder, check that the channel state's pitch bend changed by
830 /// the time the bend tick has fired.
831 #[test]
832 fn end_to_end_pitch_bend_event() {
833 let mut dec = MidiDecoder::new(Arc::new(ToneInstrument::new()), OUTPUT_SAMPLE_RATE);
834 let blob = pitch_bend_smf();
835 let pkt = Packet::new(0, TimeBase::new(1, 44_100), blob);
836 dec.send_packet(&pkt).unwrap();
837 // Pull frames until the scheduler has dispatched everything,
838 // including the pitch bend (located at tick 480, ≈ 23 k samples
839 // = ~22 chunks of 1024).
840 for _ in 0..64 {
841 match dec.receive_frame() {
842 Ok(_) => {}
843 Err(Error::Eof) => break,
844 Err(e) => panic!("unexpected: {e:?}"),
845 }
846 }
847 // Inspect the scheduler — the bend should have been applied.
848 // We can't poke the mixer directly through the decoder API; the
849 // test relies on the scheduler having walked past the event.
850 let s = dec.scheduler().unwrap();
851 assert!(s.is_done(), "scheduler should have drained the bend");
852 }
853
854 /// SMF with: tempo, note-on at tick 0, pitch-bend max-up at tick
855 /// 480, note-off at tick 960, EOT at tick 1200.
856 fn pitch_bend_smf() -> Vec<u8> {
857 let mut blob = Vec::new();
858 blob.extend_from_slice(b"MThd");
859 blob.extend_from_slice(&6u32.to_be_bytes());
860 blob.extend_from_slice(&0u16.to_be_bytes());
861 blob.extend_from_slice(&1u16.to_be_bytes());
862 blob.extend_from_slice(&480u16.to_be_bytes());
863
864 let mut t: Vec<u8> = Vec::new();
865 // tick 0 set tempo 500_000 us/qn (= 120 BPM).
866 t.extend_from_slice(&[0x00, 0xFF, 0x51, 0x03, 0x07, 0xA1, 0x20]);
867 // tick 0 note on chan 0 key 60 vel 100.
868 t.extend_from_slice(&[0x00, 0x90, 0x3C, 0x64]);
869 // tick 480 pitch bend max-up. VLQ(480) = 83 60.
870 t.extend_from_slice(&[0x83, 0x60, 0xE0, 0x7F, 0x7F]);
871 // tick 480 → tick 960: note-off. VLQ(480) = 83 60.
872 t.extend_from_slice(&[0x83, 0x60, 0x80, 0x3C, 0x40]);
873 // tick + 240 EOT. VLQ(240) = 81 70.
874 t.extend_from_slice(&[0x81, 0x70, 0xFF, 0x2F, 0x00]);
875 push_track(&mut blob, &t);
876 blob
877 }
878
879 /// End-to-end SMF with a channel-aftertouch event mid-note: assert
880 /// the decoder doesn't crash and audio still gets produced.
881 #[test]
882 fn end_to_end_channel_aftertouch_event() {
883 let mut dec = MidiDecoder::new(Arc::new(ToneInstrument::new()), OUTPUT_SAMPLE_RATE);
884 let blob = aftertouch_smf();
885 let pkt = Packet::new(0, TimeBase::new(1, 44_100), blob);
886 dec.send_packet(&pkt).unwrap();
887 let mut samples: Vec<i16> = Vec::new();
888 for _ in 0..64 {
889 match dec.receive_frame() {
890 Ok(Frame::Audio(af)) => {
891 for chunk in af.data[0].chunks_exact(2) {
892 samples.push(i16::from_le_bytes([chunk[0], chunk[1]]));
893 }
894 }
895 Err(Error::Eof) => break,
896 Ok(_) => panic!("expected audio"),
897 Err(e) => panic!("unexpected: {e:?}"),
898 }
899 }
900 // We rendered audio.
901 assert!(!samples.is_empty(), "no audio rendered");
902 let nonzero = samples.iter().filter(|s| s.abs() > 16).count();
903 assert!(
904 nonzero > samples.len() / 20,
905 "expected ≥ 5 % non-silent: {} / {}",
906 nonzero,
907 samples.len(),
908 );
909 }
910
911 /// SMF with: tempo, note-on at tick 0, channel pressure at tick 240,
912 /// note-off at tick 480, EOT at tick 720.
913 fn aftertouch_smf() -> Vec<u8> {
914 let mut blob = Vec::new();
915 blob.extend_from_slice(b"MThd");
916 blob.extend_from_slice(&6u32.to_be_bytes());
917 blob.extend_from_slice(&0u16.to_be_bytes());
918 blob.extend_from_slice(&1u16.to_be_bytes());
919 blob.extend_from_slice(&480u16.to_be_bytes());
920
921 let mut t: Vec<u8> = Vec::new();
922 t.extend_from_slice(&[0x00, 0xFF, 0x51, 0x03, 0x07, 0xA1, 0x20]);
923 t.extend_from_slice(&[0x00, 0x90, 0x3C, 0x64]);
924 // VLQ(240) = 81 70. Channel pressure D0 with value 0x60.
925 t.extend_from_slice(&[0x81, 0x70, 0xD0, 0x60]);
926 // VLQ(240): note off.
927 t.extend_from_slice(&[0x81, 0x70, 0x80, 0x3C, 0x40]);
928 // VLQ(240): EOT.
929 t.extend_from_slice(&[0x81, 0x70, 0xFF, 0x2F, 0x00]);
930 push_track(&mut blob, &t);
931 blob
932 }
933
934 #[test]
935 fn reset_clears_scheduler_and_voices() {
936 let mut dec = MidiDecoder::new(Arc::new(ToneInstrument::new()), OUTPUT_SAMPLE_RATE);
937 let pkt = Packet::new(0, TimeBase::new(1, 44_100), five_second_smf());
938 dec.send_packet(&pkt).unwrap();
939 let _ = dec.receive_frame().unwrap();
940 dec.reset().unwrap();
941 // After reset, receive_frame returns NeedMore (no scheduler).
942 match dec.receive_frame() {
943 Err(Error::NeedMore) => {}
944 other => panic!("expected NeedMore after reset, got {other:?}"),
945 }
946 }
947}