use oxideav_core::{Error, Result};
pub const MAX_VLQ_BYTES: usize = 4;
pub const MAX_EVENTS_PER_FILE: usize = 1_000_000;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum SmfFormat {
SingleTrack,
MultiTrackSimultaneous,
MultiTrackIndependent,
}
impl SmfFormat {
fn from_u16(v: u16) -> Result<Self> {
match v {
0 => Ok(Self::SingleTrack),
1 => Ok(Self::MultiTrackSimultaneous),
2 => Ok(Self::MultiTrackIndependent),
other => Err(Error::invalid(format!(
"SMF: unknown format value {other} (expected 0, 1, or 2)",
))),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Division {
TicksPerQuarter(u16),
Smpte {
frames_per_second: u8,
ticks_per_frame: u8,
},
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct SmfHeader {
pub format: SmfFormat,
pub ntrks: u16,
pub division: Division,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TrackEvent {
pub delta: u32,
pub kind: Event,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Event {
Channel(ChannelMessage),
Sysex {
escape: bool,
data: Vec<u8>,
},
Meta(MetaEvent),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct ChannelMessage {
pub channel: u8,
pub body: ChannelBody,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ChannelBody {
NoteOff { key: u8, velocity: u8 },
NoteOn { key: u8, velocity: u8 },
PolyAftertouch { key: u8, pressure: u8 },
ControlChange { controller: u8, value: u8 },
ProgramChange { program: u8 },
ChannelAftertouch { pressure: u8 },
PitchBend { value: u16 },
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum MetaEvent {
SequenceNumber(u16),
Text { kind: u8, text: Vec<u8> },
ChannelPrefix(u8),
Port(u8),
EndOfTrack,
Tempo(u32),
SmpteOffset {
hours: u8,
minutes: u8,
seconds: u8,
frames: u8,
subframes: u8,
},
TimeSignature {
numerator: u8,
denominator_pow2: u8,
clocks_per_click: u8,
notated_32nd_per_quarter: u8,
},
KeySignature { sharps_flats: i8, mode: u8 },
SequencerSpecific(Vec<u8>),
Unknown { type_byte: u8, data: Vec<u8> },
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Track {
pub events: Vec<TrackEvent>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct SmfFile {
pub header: SmfHeader,
pub tracks: Vec<Track>,
}
pub fn parse(bytes: &[u8]) -> Result<SmfFile> {
let mut cursor = Cursor::new(bytes);
let header = parse_header(&mut cursor)?;
let mut tracks: Vec<Track> = Vec::new();
let mut total_events: usize = 0;
while !cursor.is_empty() {
let tag = cursor.take(4)?;
let chunk_len = read_u32_be(&mut cursor)? as usize;
if chunk_len > cursor.remaining() {
return Err(Error::invalid(format!(
"SMF: chunk '{}' declares {chunk_len} bytes but only {} remain",
fmt_tag(tag),
cursor.remaining(),
)));
}
let payload = cursor.take(chunk_len)?;
if tag == b"MTrk" {
let track = parse_track(payload, total_events)?;
total_events += track.events.len();
if total_events > MAX_EVENTS_PER_FILE {
return Err(Error::invalid(format!(
"SMF: cumulative event count {total_events} exceeds cap of \
{MAX_EVENTS_PER_FILE}",
)));
}
tracks.push(track);
}
}
if (tracks.len() as u16) != header.ntrks {
}
Ok(SmfFile { header, tracks })
}
fn parse_header(cursor: &mut Cursor<'_>) -> Result<SmfHeader> {
let tag = cursor.take(4)?;
if tag != b"MThd" {
return Err(Error::invalid(format!(
"SMF: expected 'MThd' header chunk, got '{}'",
fmt_tag(tag),
)));
}
let chunk_len = read_u32_be(cursor)? as usize;
if chunk_len < 6 {
return Err(Error::invalid(format!(
"SMF: MThd chunk length is {chunk_len}, expected at least 6",
)));
}
if chunk_len > cursor.remaining() {
return Err(Error::invalid(format!(
"SMF: MThd declares {chunk_len} bytes but only {} remain",
cursor.remaining(),
)));
}
let body = cursor.take(chunk_len)?;
let format = SmfFormat::from_u16(u16::from_be_bytes([body[0], body[1]]))?;
let ntrks = u16::from_be_bytes([body[2], body[3]]);
let div_raw = u16::from_be_bytes([body[4], body[5]]);
let division = if div_raw & 0x8000 == 0 {
if div_raw == 0 {
return Err(Error::invalid(
"SMF: division of 0 ticks-per-quarter is not legal",
));
}
Division::TicksPerQuarter(div_raw)
} else {
let upper = (div_raw >> 8) as i8;
let frames_per_second = (-(upper as i16)) as u8;
let ticks_per_frame = (div_raw & 0xFF) as u8;
if !matches!(frames_per_second, 24 | 25 | 29 | 30) {
return Err(Error::invalid(format!(
"SMF: SMPTE frame rate {frames_per_second} not in {{24, 25, 29, 30}}",
)));
}
Division::Smpte {
frames_per_second,
ticks_per_frame,
}
};
Ok(SmfHeader {
format,
ntrks,
division,
})
}
fn parse_track(payload: &[u8], events_so_far: usize) -> Result<Track> {
let mut cursor = Cursor::new(payload);
let mut events: Vec<TrackEvent> = Vec::new();
let mut running_status: Option<u8> = None;
let mut local_total = events_so_far;
while !cursor.is_empty() {
let delta = read_vlq(&mut cursor)?;
let evt = read_event(&mut cursor, &mut running_status)?;
let is_eot = matches!(&evt, Event::Meta(MetaEvent::EndOfTrack));
events.push(TrackEvent { delta, kind: evt });
local_total = local_total.saturating_add(1);
if local_total > MAX_EVENTS_PER_FILE {
return Err(Error::invalid(format!(
"SMF: cumulative event count {local_total} exceeds cap of \
{MAX_EVENTS_PER_FILE}",
)));
}
if is_eot {
break;
}
}
Ok(Track { events })
}
fn read_event(cursor: &mut Cursor<'_>, running: &mut Option<u8>) -> Result<Event> {
let first = cursor.peek_u8()?;
if first == 0xFF {
cursor.advance(1)?;
let type_byte = cursor.read_u8()?;
let len = read_vlq(cursor)? as usize;
if len > cursor.remaining() {
return Err(Error::invalid(format!(
"SMF: meta event 0x{type_byte:02X} declares {len} bytes but only {} remain",
cursor.remaining(),
)));
}
let data = cursor.take(len)?;
*running = None;
Ok(Event::Meta(parse_meta(type_byte, data)?))
} else if first == 0xF0 || first == 0xF7 {
cursor.advance(1)?;
let len = read_vlq(cursor)? as usize;
if len > cursor.remaining() {
return Err(Error::invalid(format!(
"SMF: sysex 0x{first:02X} declares {len} bytes but only {} remain",
cursor.remaining(),
)));
}
let data = cursor.take(len)?.to_vec();
*running = None;
Ok(Event::Sysex {
escape: first == 0xF7,
data,
})
} else if first & 0x80 != 0 {
cursor.advance(1)?;
if first >= 0xF1 {
return Err(Error::invalid(format!(
"SMF: status byte 0x{first:02X} is System Common/Real-Time, \
not legal inside an MTrk chunk",
)));
}
*running = Some(first);
read_channel_message(cursor, first)
} else {
let status = running.ok_or_else(|| {
Error::invalid(format!(
"SMF: data byte 0x{first:02X} appeared without a prior status byte \
(no running status to inherit)",
))
})?;
read_channel_message(cursor, status)
}
}
fn read_channel_message(cursor: &mut Cursor<'_>, status: u8) -> Result<Event> {
let channel = status & 0x0F;
let kind = status & 0xF0;
let body = match kind {
0x80 => {
let key = cursor.read_data_byte()?;
let velocity = cursor.read_data_byte()?;
ChannelBody::NoteOff { key, velocity }
}
0x90 => {
let key = cursor.read_data_byte()?;
let velocity = cursor.read_data_byte()?;
ChannelBody::NoteOn { key, velocity }
}
0xA0 => {
let key = cursor.read_data_byte()?;
let pressure = cursor.read_data_byte()?;
ChannelBody::PolyAftertouch { key, pressure }
}
0xB0 => {
let controller = cursor.read_data_byte()?;
let value = cursor.read_data_byte()?;
ChannelBody::ControlChange { controller, value }
}
0xC0 => {
let program = cursor.read_data_byte()?;
ChannelBody::ProgramChange { program }
}
0xD0 => {
let pressure = cursor.read_data_byte()?;
ChannelBody::ChannelAftertouch { pressure }
}
0xE0 => {
let lsb = cursor.read_data_byte()? as u16;
let msb = cursor.read_data_byte()? as u16;
ChannelBody::PitchBend {
value: (msb << 7) | lsb,
}
}
_ => unreachable!("status nibble {kind:02X} is not a channel-voice message"),
};
Ok(Event::Channel(ChannelMessage { channel, body }))
}
fn parse_meta(type_byte: u8, data: &[u8]) -> Result<MetaEvent> {
Ok(match type_byte {
0x00 if data.len() == 2 => {
MetaEvent::SequenceNumber(u16::from_be_bytes([data[0], data[1]]))
}
0x01..=0x0F => MetaEvent::Text {
kind: type_byte,
text: data.to_vec(),
},
0x20 if data.len() == 1 => MetaEvent::ChannelPrefix(data[0]),
0x21 if data.len() == 1 => MetaEvent::Port(data[0]),
0x2F if data.is_empty() => MetaEvent::EndOfTrack,
0x51 if data.len() == 3 => {
MetaEvent::Tempo(((data[0] as u32) << 16) | ((data[1] as u32) << 8) | (data[2] as u32))
}
0x54 if data.len() == 5 => MetaEvent::SmpteOffset {
hours: data[0],
minutes: data[1],
seconds: data[2],
frames: data[3],
subframes: data[4],
},
0x58 if data.len() == 4 => MetaEvent::TimeSignature {
numerator: data[0],
denominator_pow2: data[1],
clocks_per_click: data[2],
notated_32nd_per_quarter: data[3],
},
0x59 if data.len() == 2 => MetaEvent::KeySignature {
sharps_flats: data[0] as i8,
mode: data[1],
},
0x7F => MetaEvent::SequencerSpecific(data.to_vec()),
_ => MetaEvent::Unknown {
type_byte,
data: data.to_vec(),
},
})
}
struct Cursor<'a> {
bytes: &'a [u8],
pos: usize,
}
impl<'a> Cursor<'a> {
fn new(bytes: &'a [u8]) -> Self {
Self { bytes, pos: 0 }
}
fn remaining(&self) -> usize {
self.bytes.len() - self.pos
}
fn is_empty(&self) -> bool {
self.pos >= self.bytes.len()
}
fn take(&mut self, n: usize) -> Result<&'a [u8]> {
if self.remaining() < n {
return Err(Error::invalid(format!(
"SMF: short read — wanted {n} bytes, {} remain",
self.remaining()
)));
}
let s = &self.bytes[self.pos..self.pos + n];
self.pos += n;
Ok(s)
}
fn read_u8(&mut self) -> Result<u8> {
Ok(self.take(1)?[0])
}
fn read_data_byte(&mut self) -> Result<u8> {
let b = self.read_u8()?;
if b & 0x80 != 0 {
return Err(Error::invalid(format!(
"SMF: expected data byte (high bit clear), got 0x{b:02X}",
)));
}
Ok(b)
}
fn peek_u8(&self) -> Result<u8> {
if self.is_empty() {
return Err(Error::invalid("SMF: short read — wanted 1 byte, 0 remain"));
}
Ok(self.bytes[self.pos])
}
fn advance(&mut self, n: usize) -> Result<()> {
if self.remaining() < n {
return Err(Error::invalid(format!(
"SMF: short advance — wanted {n} bytes, {} remain",
self.remaining()
)));
}
self.pos += n;
Ok(())
}
}
fn read_u32_be(cursor: &mut Cursor<'_>) -> Result<u32> {
let s = cursor.take(4)?;
Ok(u32::from_be_bytes([s[0], s[1], s[2], s[3]]))
}
fn read_vlq(cursor: &mut Cursor<'_>) -> Result<u32> {
let mut value: u32 = 0;
for i in 0..MAX_VLQ_BYTES {
let b = cursor.read_u8()?;
value = (value << 7) | ((b & 0x7F) as u32);
if b & 0x80 == 0 {
return Ok(value);
}
if i == MAX_VLQ_BYTES - 1 {
return Err(Error::invalid(format!(
"SMF: VLQ exceeded {MAX_VLQ_BYTES}-byte cap (continuation bit set on final byte)",
)));
}
}
unreachable!("loop returns or errors before this point");
}
fn fmt_tag(tag: &[u8]) -> String {
String::from_utf8_lossy(tag).into_owned()
}
#[cfg(test)]
mod tests {
use super::*;
fn encode_vlq(mut v: u32) -> Vec<u8> {
let mut buf = vec![v & 0x7F];
v >>= 7;
while v != 0 {
buf.push((v & 0x7F) | 0x80);
v >>= 7;
}
buf.into_iter().rev().map(|b| b as u8).collect()
}
fn header_chunk(format: u16, ntrks: u16, division: u16) -> Vec<u8> {
let mut b = vec![];
b.extend_from_slice(b"MThd");
b.extend_from_slice(&6u32.to_be_bytes());
b.extend_from_slice(&format.to_be_bytes());
b.extend_from_slice(&ntrks.to_be_bytes());
b.extend_from_slice(&division.to_be_bytes());
b
}
fn track_chunk(events: &[u8]) -> Vec<u8> {
let mut b = vec![];
b.extend_from_slice(b"MTrk");
b.extend_from_slice(&(events.len() as u32).to_be_bytes());
b.extend_from_slice(events);
b
}
#[test]
fn vlq_one_byte() {
let mut c = Cursor::new(&[0x00]);
assert_eq!(read_vlq(&mut c).unwrap(), 0);
let mut c = Cursor::new(&[0x40]);
assert_eq!(read_vlq(&mut c).unwrap(), 0x40);
let mut c = Cursor::new(&[0x7F]);
assert_eq!(read_vlq(&mut c).unwrap(), 0x7F);
}
#[test]
fn vlq_multi_byte() {
let cases: &[(u32, &[u8])] = &[
(0x80, &[0x81, 0x00]),
(0x2000, &[0xC0, 0x00]),
(0x3FFF, &[0xFF, 0x7F]),
(0x10_0000, &[0xC0, 0x80, 0x00]),
(0x1F_FFFF, &[0xFF, 0xFF, 0x7F]),
(0x20_0000, &[0x81, 0x80, 0x80, 0x00]),
(0x0FFF_FFFF, &[0xFF, 0xFF, 0xFF, 0x7F]),
];
for (v, bytes) in cases {
let mut c = Cursor::new(bytes);
assert_eq!(
read_vlq(&mut c).unwrap(),
*v,
"decode VLQ {v:#x} from {bytes:?}",
);
assert_eq!(encode_vlq(*v), bytes.to_vec(), "round-trip VLQ {v:#x}");
}
}
#[test]
fn vlq_rejects_5_byte() {
let mut c = Cursor::new(&[0xFF, 0xFF, 0xFF, 0xFF, 0x7F]);
let err = read_vlq(&mut c).unwrap_err();
assert!(matches!(err, Error::InvalidData(_)));
}
#[test]
fn header_format_0_ticks_per_quarter() {
let mut blob = header_chunk(0, 1, 480);
blob.extend(track_chunk(&[0x00, 0xFF, 0x2F, 0x00]));
let smf = parse(&blob).unwrap();
assert_eq!(smf.header.format, SmfFormat::SingleTrack);
assert_eq!(smf.header.ntrks, 1);
assert_eq!(smf.header.division, Division::TicksPerQuarter(480));
assert_eq!(smf.tracks.len(), 1);
assert_eq!(smf.tracks[0].events.len(), 1);
assert!(matches!(
smf.tracks[0].events[0].kind,
Event::Meta(MetaEvent::EndOfTrack)
));
}
#[test]
fn header_smpte_division() {
let div = u16::from_be_bytes([0xE7, 0x28]);
let mut blob = header_chunk(0, 1, div);
blob.extend(track_chunk(&[0x00, 0xFF, 0x2F, 0x00]));
let smf = parse(&blob).unwrap();
assert_eq!(
smf.header.division,
Division::Smpte {
frames_per_second: 25,
ticks_per_frame: 40,
},
);
}
#[test]
fn type_0_single_track_with_note_pair_and_tempo() {
let mut events = vec![];
events.extend_from_slice(&[0x00, 0xFF, 0x51, 0x03, 0x07, 0xA1, 0x20]);
events.extend_from_slice(&[0x00, 0xFF, 0x58, 0x04, 0x04, 0x02, 0x18, 0x08]);
events.extend_from_slice(&[0x00, 0x90, 0x3C, 0x64]);
events.extend_from_slice(&encode_vlq(480));
events.extend_from_slice(&[0x80, 0x3C, 0x40]);
events.extend_from_slice(&[0x00, 0xFF, 0x2F, 0x00]);
let mut blob = header_chunk(0, 1, 480);
blob.extend(track_chunk(&events));
let smf = parse(&blob).unwrap();
assert_eq!(smf.header.format, SmfFormat::SingleTrack);
let evs = &smf.tracks[0].events;
assert_eq!(evs.len(), 5);
assert!(matches!(
evs[0].kind,
Event::Meta(MetaEvent::Tempo(500_000))
));
assert!(matches!(
evs[1].kind,
Event::Meta(MetaEvent::TimeSignature {
numerator: 4,
denominator_pow2: 2,
clocks_per_click: 24,
notated_32nd_per_quarter: 8,
})
));
match &evs[2].kind {
Event::Channel(ChannelMessage {
channel: 0,
body:
ChannelBody::NoteOn {
key: 60,
velocity: 100,
},
}) => {}
other => panic!("unexpected event #2: {other:?}"),
}
assert_eq!(evs[3].delta, 480);
match &evs[3].kind {
Event::Channel(ChannelMessage {
channel: 0,
body:
ChannelBody::NoteOff {
key: 60,
velocity: 0x40,
},
}) => {}
other => panic!("unexpected event #3: {other:?}"),
}
assert!(matches!(evs[4].kind, Event::Meta(MetaEvent::EndOfTrack)));
}
#[test]
fn running_status_is_honoured() {
let events: &[u8] = &[
0x00, 0x90, 0x3C, 0x64, 0x0A, 0x3D, 0x64, 0x0A, 0x3E, 0x64, 0x00, 0xFF, 0x2F, 0x00,
];
let mut blob = header_chunk(0, 1, 96);
blob.extend(track_chunk(events));
let smf = parse(&blob).unwrap();
let evs = &smf.tracks[0].events;
assert_eq!(evs.len(), 4);
for (i, &expected_key) in [60u8, 61, 62].iter().enumerate() {
match &evs[i].kind {
Event::Channel(ChannelMessage {
channel: 0,
body: ChannelBody::NoteOn { key, velocity: 100 },
}) if *key == expected_key => {}
other => panic!("event #{i}: expected NoteOn key={expected_key}, got {other:?}"),
}
}
}
#[test]
fn type_1_multi_track() {
let track1: &[u8] = &[
0x00, 0xFF, 0x51, 0x03, 0x07, 0xA1, 0x20, 0x00, 0xFF, 0x58, 0x04, 0x04, 0x02, 0x18,
0x08, 0x00, 0xFF, 0x2F, 0x00,
];
let mut track2 = vec![0x00, 0x91, 0x40, 0x5A];
track2.extend_from_slice(&encode_vlq(0x2000));
track2.extend_from_slice(&[0x81, 0x40, 0x40, 0x00, 0xFF, 0x2F, 0x00]);
let mut blob = header_chunk(1, 2, 480);
blob.extend(track_chunk(track1));
blob.extend(track_chunk(&track2));
let smf = parse(&blob).unwrap();
assert_eq!(smf.header.format, SmfFormat::MultiTrackSimultaneous);
assert_eq!(smf.tracks.len(), 2);
assert_eq!(smf.tracks[0].events.len(), 3);
assert_eq!(smf.tracks[1].events.len(), 3);
match &smf.tracks[1].events[0].kind {
Event::Channel(ChannelMessage {
channel: 1,
body:
ChannelBody::NoteOn {
key: 64,
velocity: 90,
},
}) => {}
other => panic!("track 2 event 0 unexpected: {other:?}"),
}
assert_eq!(smf.tracks[1].events[1].delta, 0x2000);
}
#[test]
fn unknown_chunk_is_skipped() {
let mut blob = header_chunk(0, 1, 96);
blob.extend_from_slice(b"XYZW");
blob.extend_from_slice(&3u32.to_be_bytes());
blob.extend_from_slice(&[0xAA, 0xBB, 0xCC]);
blob.extend(track_chunk(&[0x00, 0xFF, 0x2F, 0x00]));
let smf = parse(&blob).unwrap();
assert_eq!(smf.tracks.len(), 1);
}
#[test]
fn meta_text_events() {
let mut events = vec![0x00, 0xFF, 0x03, 0x06];
events.extend_from_slice(b"Track1");
events.extend_from_slice(&[0x00, 0xFF, 0x2F, 0x00]);
let mut blob = header_chunk(0, 1, 96);
blob.extend(track_chunk(&events));
let smf = parse(&blob).unwrap();
match &smf.tracks[0].events[0].kind {
Event::Meta(MetaEvent::Text { kind: 0x03, text }) => {
assert_eq!(text, b"Track1");
}
other => panic!("expected text event, got {other:?}"),
}
}
#[test]
fn pitch_bend_combines_lsb_msb() {
let events = [0x00, 0xE0, 0x00, 0x40, 0x00, 0xFF, 0x2F, 0x00];
let mut blob = header_chunk(0, 1, 96);
blob.extend(track_chunk(&events));
let smf = parse(&blob).unwrap();
assert_eq!(smf.tracks[0].events[0].delta, 0);
match &smf.tracks[0].events[0].kind {
Event::Channel(ChannelMessage {
channel: 0,
body: ChannelBody::PitchBend { value: 0x2000 },
}) => {}
other => panic!("expected pitch bend 0x2000, got {other:?}"),
}
}
#[test]
fn sysex_event() {
let events = [
0x00, 0xF0, 0x04, 0x7E, 0x7F, 0x09, 0x01, 0x00, 0xFF, 0x2F, 0x00,
];
let mut blob = header_chunk(0, 1, 96);
blob.extend(track_chunk(&events));
let smf = parse(&blob).unwrap();
match &smf.tracks[0].events[0].kind {
Event::Sysex {
escape: false,
data,
} => assert_eq!(data, &[0x7E, 0x7F, 0x09, 0x01]),
other => panic!("expected sysex, got {other:?}"),
}
}
#[test]
fn rejects_chunk_length_overrun() {
let mut blob = vec![];
blob.extend_from_slice(b"MThd");
blob.extend_from_slice(&60u32.to_be_bytes());
blob.extend_from_slice(&[0; 6]);
let err = parse(&blob).unwrap_err();
assert!(matches!(err, Error::InvalidData(_)));
}
#[test]
fn rejects_meta_length_overrun() {
let events: &[u8] = &[0x00, 0xFF, 0x03, 0xFF, 0x7F];
let mut blob = header_chunk(0, 1, 96);
blob.extend(track_chunk(events));
let err = parse(&blob).unwrap_err();
assert!(matches!(err, Error::InvalidData(_)));
}
#[test]
fn rejects_data_byte_without_status() {
let events: &[u8] = &[0x00, 0x40, 0x40];
let mut blob = header_chunk(0, 1, 96);
blob.extend(track_chunk(events));
let err = parse(&blob).unwrap_err();
assert!(matches!(err, Error::InvalidData(_)));
}
#[test]
fn rejects_system_common_in_track() {
let events: &[u8] = &[0x00, 0xF1, 0x40];
let mut blob = header_chunk(0, 1, 96);
blob.extend(track_chunk(events));
let err = parse(&blob).unwrap_err();
assert!(matches!(err, Error::InvalidData(_)));
}
}