Skip to main content

oxideav_mod/
stm.rs

1//! Scream Tracker v1.0 ("STM") module parsing.
2//!
3//! STM is the pre-S3M module format used by Scream Tracker 1.x. It's a
4//! smaller, 4-channel-fixed tracker format with Intel (little-endian)
5//! byte order, 31 instruments, and 64-row patterns stored as
6//! `4 rows * 4 channels * 4 bytes` fixed-size cells.
7//!
8//! Layout summary (see `docs/audio/trackers/stm/ScreamTracker-v1.0-stm.txt`):
9//!
10//! ```text
11//! Offset 0x000    20 bytes   ASCIIZ song name
12//! Offset 0x014     8 bytes   Tracker name ("!Scream!" typically)
13//! Offset 0x01C     1 byte    ID = 0x1A
14//! Offset 0x01D     1 byte    File type: 1 = song (no samples), 2 = module
15//! Offset 0x01E     1 byte    Major version
16//! Offset 0x01F     1 byte    Minor version
17//! Offset 0x020     1 byte    Playback tempo
18//! Offset 0x021     1 byte    Number of patterns ("PAT" in the doc)
19//! Offset 0x022     1 byte    Global playback volume
20//! Offset 0x023    13 bytes   reserved
21//! Offset 0x030    31 * 32    Instrument records (32 bytes each):
22//!                              12 char ASCIIZ instrument name
23//!                               1 byte ID = 0
24//!                               1 byte instrument disk
25//!                               2 bytes reserved
26//!                               2 bytes sample length (bytes, LE)
27//!                               2 bytes sample loop start (LE)
28//!                               2 bytes sample loop end   (LE)
29//!                               1 byte  default volume
30//!                               1 byte  reserved
31//!                               2 bytes C3 frequency (LE)
32//!                               4 bytes reserved
33//!                               2 bytes length in paragraphs (modules only)
34//! Offset 0x3D0    64 bytes   Pattern order table (entries 0..=63)
35//! Offset 0x410    PAT*1024   Pattern data (64 rows × 4 ch × 4 bytes each)
36//! Then            …          Raw sample bodies (16-byte padded).
37//! ```
38//!
39//! This module only parses structural metadata + sample extraction;
40//! playback is not currently wired to the MOD mixer (STM uses C3
41//! frequencies rather than Amiga periods, so the pitch math differs).
42//! The Demuxer emits a single packet carrying the entire file; a decoder
43//! looking to play STM can parse the tables exposed here and render.
44
45use oxideav_core::{Error, Result};
46
47/// STM fixed header size before instrument table.
48pub const HEADER_PREFIX_SIZE: usize = 0x30;
49/// Size of one instrument record, in bytes.
50pub const INSTRUMENT_RECORD_SIZE: usize = 32;
51/// Number of instruments in an STM file.
52pub const INSTRUMENT_COUNT: usize = 31;
53/// Offset of the pattern order table.
54pub const ORDER_TABLE_OFFSET: usize = 0x3D0;
55/// Pattern order table size in bytes.
56pub const ORDER_TABLE_SIZE: usize = 64;
57/// Offset where pattern data begins.
58pub const PATTERN_DATA_OFFSET: usize = 0x410;
59/// STM patterns always have 64 rows.
60pub const PATTERN_ROWS: usize = 64;
61/// STM is always 4 channels.
62pub const STM_CHANNELS: usize = 4;
63/// Bytes per row-cell (1 note + 1 volume/sample + 1 command + 1 cmd param).
64pub const CELL_BYTES: usize = 4;
65/// Bytes per pattern: 64 rows * 4 channels * 4 bytes.
66pub const BYTES_PER_PATTERN: usize = PATTERN_ROWS * STM_CHANNELS * CELL_BYTES;
67
68/// STM file type field (offset 0x1D).
69#[derive(Clone, Copy, Debug, PartialEq, Eq)]
70pub enum StmFileType {
71    /// 1 — song data only, no samples.
72    Song,
73    /// 2 — module with samples.
74    Module,
75    Other(u8),
76}
77
78impl From<u8> for StmFileType {
79    fn from(v: u8) -> Self {
80        match v {
81            1 => StmFileType::Song,
82            2 => StmFileType::Module,
83            x => StmFileType::Other(x),
84        }
85    }
86}
87
88/// Decoded STM instrument record (pre-sample-extraction).
89#[derive(Clone, Debug, Default)]
90pub struct StmInstrument {
91    pub name: String,
92    /// "Instrument disk" — legacy field, kept for fidelity.
93    pub disk: u8,
94    /// Sample length in bytes (raw 16-bit LE from the file).
95    pub length: u16,
96    /// Loop start, in samples (byte offset for 8-bit PCM).
97    pub loop_start: u16,
98    /// Loop end, in samples. 0xFFFF commonly indicates "no loop".
99    pub loop_end: u16,
100    /// Default volume 0..=64.
101    pub volume: u8,
102    /// C3 frequency in Hz.
103    pub c3_hz: u16,
104    /// Length in paragraphs (module files only).
105    pub paragraphs: u16,
106}
107
108/// Top-level STM header.
109#[derive(Clone, Debug)]
110pub struct StmHeader {
111    pub title: String,
112    pub tracker_name: String,
113    pub file_type: StmFileType,
114    pub version_major: u8,
115    pub version_minor: u8,
116    pub tempo: u8,
117    pub n_patterns: u8,
118    pub global_volume: u8,
119    pub instruments: Vec<StmInstrument>,
120    pub order: Vec<u8>,
121}
122
123/// One decoded pattern cell.
124#[derive(Clone, Copy, Debug, Default)]
125pub struct StmCell {
126    /// Raw note byte (251..=255 are flag markers; otherwise
127    /// high nibble = octave, low nibble = semitone within octave).
128    pub note_raw: u8,
129    /// Instrument index 0..=31 (0 = no instrument change).
130    pub instrument: u8,
131    /// Volume 0..=64 (may be a combined nibble/bits field per spec).
132    pub volume: u8,
133    /// Effect command nibble (0..=0xF).
134    pub command: u8,
135    /// Command parameter byte.
136    pub command_param: u8,
137}
138
139impl StmCell {
140    /// Classify the note byte.
141    pub fn kind(&self) -> StmNoteKind {
142        match self.note_raw {
143            251 => StmNoteKind::Empty,
144            252 => StmNoteKind::DashNote,
145            253 => StmNoteKind::Dots,
146            254 | 255 => StmNoteKind::Reserved,
147            _ => StmNoteKind::Note {
148                octave: self.note_raw >> 4,
149                semitone: self.note_raw & 0x0F,
150            },
151        }
152    }
153}
154
155/// Semantic interpretation of the STM note byte.
156#[derive(Clone, Copy, Debug, PartialEq, Eq)]
157pub enum StmNoteKind {
158    Note { octave: u8, semitone: u8 },
159    Empty,
160    DashNote,
161    Dots,
162    Reserved,
163}
164
165#[derive(Clone, Debug)]
166pub struct StmPattern {
167    /// `rows[row_index][channel_index]`.
168    pub rows: Vec<Vec<StmCell>>,
169}
170
171/// Per-instrument sample body after extraction.
172#[derive(Clone, Debug, Default)]
173pub struct StmSampleBody {
174    /// Raw signed 8-bit PCM. Empty if the instrument has no samples or
175    /// the file was truncated.
176    pub pcm: Vec<i8>,
177    pub loop_start: u16,
178    pub loop_end: u16,
179    pub volume: u8,
180    pub c3_hz: u16,
181}
182
183impl StmSampleBody {
184    /// True if this body carries a valid forward loop. STM signals
185    /// "no loop" either with `loop_end == 0xFFFF` or by having
186    /// `loop_end <= loop_start`.
187    pub fn is_looped(&self) -> bool {
188        self.loop_end != 0xFFFF && (self.loop_end as usize) > (self.loop_start as usize)
189    }
190}
191
192impl crate::mixer::SampleSource for StmSampleBody {
193    fn len(&self) -> usize {
194        self.pcm.len()
195    }
196    fn loop_start(&self) -> usize {
197        if self.is_looped() {
198            (self.loop_start as usize).min(self.pcm.len())
199        } else {
200            0
201        }
202    }
203    fn loop_end(&self) -> usize {
204        if self.is_looped() {
205            (self.loop_end as usize).min(self.pcm.len())
206        } else {
207            self.pcm.len()
208        }
209    }
210    fn loop_kind(&self) -> crate::mixer::LoopKind {
211        if self.is_looped() {
212            crate::mixer::LoopKind::Forward
213        } else {
214            crate::mixer::LoopKind::None
215        }
216    }
217    fn at(&self, idx: usize) -> f32 {
218        self.pcm.get(idx).copied().unwrap_or(0) as f32 / 128.0
219    }
220}
221
222/// Test whether a byte slice looks like an STM file. Returns `true` iff
223/// the mandatory ID byte at offset 0x1C is 0x1A and the file type byte
224/// at 0x1D is 1 or 2 (Song / Module). The 8-byte tracker-name field is
225/// informational — typical values are `"!Scream!"`, `"!Scrn123"`, etc.
226pub fn is_stm(bytes: &[u8]) -> bool {
227    if bytes.len() < HEADER_PREFIX_SIZE {
228        return false;
229    }
230    // Tracker name must be printable ASCII or spaces.
231    if !bytes[0x14..0x1C]
232        .iter()
233        .all(|&b| b.is_ascii_graphic() || b == b' ')
234    {
235        return false;
236    }
237    if bytes[0x1C] != 0x1A {
238        return false;
239    }
240    let file_type = bytes[0x1D];
241    file_type == 1 || file_type == 2
242}
243
244fn read_cstring(bytes: &[u8]) -> String {
245    let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
246    String::from_utf8_lossy(&bytes[..end])
247        .trim_end()
248        .to_string()
249}
250
251fn read_u16_le(bytes: &[u8], off: usize) -> u16 {
252    u16::from_le_bytes([bytes[off], bytes[off + 1]])
253}
254
255/// Parse an STM file header + instrument table + order table.
256///
257/// Returns `Error::NeedMore` if the buffer is too short to reach the
258/// order table; `Error::invalid` if the mandatory ID byte is wrong.
259pub fn parse_header(bytes: &[u8]) -> Result<StmHeader> {
260    if bytes.len() < ORDER_TABLE_OFFSET + ORDER_TABLE_SIZE {
261        return Err(Error::NeedMore);
262    }
263    if bytes[0x1C] != 0x1A {
264        return Err(Error::invalid("STM: missing 0x1A id byte at offset 0x1C"));
265    }
266
267    let title = read_cstring(&bytes[0..20]);
268    let tracker_name = read_cstring(&bytes[0x14..0x1C]);
269    let file_type = StmFileType::from(bytes[0x1D]);
270    let version_major = bytes[0x1E];
271    let version_minor = bytes[0x1F];
272    let tempo = bytes[0x20];
273    let n_patterns = bytes[0x21];
274    let global_volume = bytes[0x22];
275
276    // Instrument table.
277    let mut instruments = Vec::with_capacity(INSTRUMENT_COUNT);
278    for i in 0..INSTRUMENT_COUNT {
279        let off = HEADER_PREFIX_SIZE + i * INSTRUMENT_RECORD_SIZE;
280        let rec = &bytes[off..off + INSTRUMENT_RECORD_SIZE];
281        let name = read_cstring(&rec[0..12]);
282        // rec[12] = ID, rec[13] = disk, rec[14..16] reserved
283        let disk = rec[13];
284        let length = read_u16_le(rec, 16);
285        let loop_start = read_u16_le(rec, 18);
286        let loop_end = read_u16_le(rec, 20);
287        let volume = rec[22].min(64);
288        // rec[23] reserved
289        let c3_hz = read_u16_le(rec, 24);
290        // rec[26..30] reserved
291        let paragraphs = read_u16_le(rec, 30);
292        instruments.push(StmInstrument {
293            name,
294            disk,
295            length,
296            loop_start,
297            loop_end,
298            volume,
299            c3_hz,
300            paragraphs,
301        });
302    }
303
304    let order: Vec<u8> = bytes[ORDER_TABLE_OFFSET..ORDER_TABLE_OFFSET + ORDER_TABLE_SIZE].to_vec();
305
306    Ok(StmHeader {
307        title,
308        tracker_name,
309        file_type,
310        version_major,
311        version_minor,
312        tempo,
313        n_patterns,
314        global_volume,
315        instruments,
316        order,
317    })
318}
319
320/// Parse all STM patterns from the buffer.
321///
322/// Any pattern whose data runs past end-of-file is silently truncated:
323/// unread cells default to `StmCell::default()`. This matches the
324/// robust-on-truncation policy used by the MOD parser.
325pub fn parse_patterns(header: &StmHeader, bytes: &[u8]) -> Vec<StmPattern> {
326    let mut patterns = Vec::with_capacity(header.n_patterns as usize);
327    for p in 0..header.n_patterns as usize {
328        let mut rows = Vec::with_capacity(PATTERN_ROWS);
329        for r in 0..PATTERN_ROWS {
330            let mut row = Vec::with_capacity(STM_CHANNELS);
331            for c in 0..STM_CHANNELS {
332                let off = PATTERN_DATA_OFFSET
333                    + p * BYTES_PER_PATTERN
334                    + r * STM_CHANNELS * CELL_BYTES
335                    + c * CELL_BYTES;
336                let cell = if off + CELL_BYTES <= bytes.len() {
337                    // Byte 0 is the note byte.
338                    // Byte 1: bits 0..=2 low of volume, bits 3..=7 instrument.
339                    // Byte 2: bits 0..=3 command (ProTracker-ish),
340                    //         bits 4..=6 upper bits of volume.
341                    // Byte 3: command parameter.
342                    let b0 = bytes[off];
343                    let b1 = bytes[off + 1];
344                    let b2 = bytes[off + 2];
345                    let b3 = bytes[off + 3];
346                    let instrument = (b1 >> 3) & 0x1F;
347                    let vol_lo = b1 & 0x07;
348                    let vol_hi = (b2 >> 4) & 0x07;
349                    let volume = (vol_hi << 3) | vol_lo;
350                    let command = b2 & 0x0F;
351                    StmCell {
352                        note_raw: b0,
353                        instrument,
354                        volume: volume.min(64),
355                        command,
356                        command_param: b3,
357                    }
358                } else {
359                    StmCell::default()
360                };
361                row.push(cell);
362            }
363            rows.push(row);
364        }
365        patterns.push(StmPattern { rows });
366    }
367    patterns
368}
369
370/// Absolute offset in the file where sample bodies begin.
371pub fn sample_data_offset(header: &StmHeader) -> usize {
372    PATTERN_DATA_OFFSET + header.n_patterns as usize * BYTES_PER_PATTERN
373}
374
375/// Extract all 31 instrument sample bodies.
376///
377/// Samples extending past EOF are clamped; if any sample straddles the
378/// 16-byte padding boundary mentioned in the spec we ignore the padding
379/// (sample bodies are laid out contiguously by length, per the spec).
380pub fn extract_samples(header: &StmHeader, bytes: &[u8]) -> Vec<StmSampleBody> {
381    let mut out = Vec::with_capacity(header.instruments.len());
382    let mut cursor = sample_data_offset(header);
383    let end = bytes.len();
384    for inst in &header.instruments {
385        let declared = inst.length as usize;
386        let available = end.saturating_sub(cursor);
387        let take = declared.min(available);
388        let pcm: Vec<i8> = if take == 0 {
389            Vec::new()
390        } else {
391            bytes[cursor..cursor + take]
392                .iter()
393                .map(|&b| b as i8)
394                .collect()
395        };
396        cursor += take;
397        out.push(StmSampleBody {
398            pcm,
399            loop_start: inst.loop_start,
400            loop_end: inst.loop_end,
401            volume: inst.volume,
402            c3_hz: inst.c3_hz,
403        });
404    }
405    out
406}
407
408/// Rough upper-bound duration estimate in microseconds, derived from
409/// `song_length * rows * 6 ticks / tempo` analogous to the MOD estimate.
410/// STM's tempo field is *not* ProTracker BPM — it's a per-row tick count
411/// scaled by a hardware factor. For a rough estimate we treat the tempo
412/// field like a BPM-ish value; real songs change tempo via effects so
413/// callers should treat this as a loose upper bound only.
414pub fn estimate_duration_micros(header: &StmHeader) -> i64 {
415    let orders = (header.n_patterns as i64).max(1);
416    let tempo = header.tempo.max(1) as i64;
417    // Fallback heuristic: ~125 BPM equivalent at tempo=0x60 (96).
418    let bpm_equiv = (tempo * 125 / 0x60).max(30);
419    orders.saturating_mul(64 * 6 * 1_000_000) / (bpm_equiv * 2 / 5).max(1)
420}
421
422#[cfg(test)]
423mod tests {
424    use super::*;
425
426    /// Build a minimal STM with `n_patterns` patterns and one instrument
427    /// carrying a 4-byte body.
428    fn build_minimal_stm(n_patterns: u8) -> Vec<u8> {
429        let mut out = vec![0u8; PATTERN_DATA_OFFSET];
430        out[0..4].copy_from_slice(b"test");
431        out[0x14..0x1C].copy_from_slice(b"!Scream!");
432        out[0x1C] = 0x1A;
433        out[0x1D] = 2; // module
434        out[0x1E] = 2;
435        out[0x1F] = 0;
436        out[0x20] = 0x60; // tempo
437        out[0x21] = n_patterns;
438        out[0x22] = 64; // global volume
439
440        // Instrument 0 at HEADER_PREFIX_SIZE (0x30).
441        let inst_off = HEADER_PREFIX_SIZE;
442        out[inst_off..inst_off + 4].copy_from_slice(b"bass");
443        // length (bytes) at rec offset 16 → file offset inst_off + 16
444        out[inst_off + 16..inst_off + 18].copy_from_slice(&4u16.to_le_bytes());
445        out[inst_off + 22] = 64; // volume
446        out[inst_off + 24..inst_off + 26].copy_from_slice(&8363u16.to_le_bytes()); // C3 hz
447
448        // Order: only pattern 0 at index 0.
449        out[ORDER_TABLE_OFFSET] = 0;
450        for i in 1..ORDER_TABLE_SIZE {
451            out[ORDER_TABLE_OFFSET + i] = 255;
452        }
453
454        // Pattern block filled with zeros.
455        out.extend(std::iter::repeat_n(
456            0u8,
457            n_patterns as usize * BYTES_PER_PATTERN,
458        ));
459
460        // Instrument 0 body: 4 signed bytes.
461        out.extend([0x10u8, 0xF0, 0x40, 0xC0]);
462        out
463    }
464
465    #[test]
466    fn is_stm_accepts_minimal_file() {
467        let bytes = build_minimal_stm(1);
468        assert!(is_stm(&bytes));
469    }
470
471    #[test]
472    fn is_stm_rejects_missing_id_byte() {
473        let mut bytes = build_minimal_stm(1);
474        bytes[0x1C] = 0;
475        assert!(!is_stm(&bytes));
476    }
477
478    #[test]
479    fn is_stm_rejects_bad_file_type() {
480        let mut bytes = build_minimal_stm(1);
481        bytes[0x1D] = 99;
482        assert!(!is_stm(&bytes));
483    }
484
485    #[test]
486    fn parse_header_populates_core_fields() {
487        let bytes = build_minimal_stm(2);
488        let h = parse_header(&bytes).unwrap();
489        assert_eq!(h.title, "test");
490        assert_eq!(h.tracker_name, "!Scream!");
491        assert_eq!(h.file_type, StmFileType::Module);
492        assert_eq!(h.version_major, 2);
493        assert_eq!(h.tempo, 0x60);
494        assert_eq!(h.n_patterns, 2);
495        assert_eq!(h.global_volume, 64);
496        assert_eq!(h.instruments.len(), INSTRUMENT_COUNT);
497        assert_eq!(h.instruments[0].name, "bass");
498        assert_eq!(h.instruments[0].length, 4);
499        assert_eq!(h.instruments[0].c3_hz, 8363);
500        assert_eq!(h.order.len(), ORDER_TABLE_SIZE);
501        assert_eq!(h.order[0], 0);
502    }
503
504    #[test]
505    fn parse_header_rejects_bad_id_byte() {
506        let mut bytes = build_minimal_stm(1);
507        bytes[0x1C] = 0;
508        assert!(parse_header(&bytes).is_err());
509    }
510
511    #[test]
512    fn parse_header_needs_full_order_table() {
513        let bytes = build_minimal_stm(1);
514        let short = &bytes[..0x3D0];
515        assert!(parse_header(short).is_err());
516    }
517
518    #[test]
519    fn parse_patterns_returns_empty_rows_by_default() {
520        let bytes = build_minimal_stm(1);
521        let h = parse_header(&bytes).unwrap();
522        let pats = parse_patterns(&h, &bytes);
523        assert_eq!(pats.len(), 1);
524        assert_eq!(pats[0].rows.len(), PATTERN_ROWS);
525        assert_eq!(pats[0].rows[0].len(), STM_CHANNELS);
526        assert_eq!(pats[0].rows[0][0].note_raw, 0);
527    }
528
529    #[test]
530    fn parse_patterns_decodes_cell_bit_fields() {
531        let mut bytes = build_minimal_stm(1);
532        // Row 0, channel 0: octave 4, semitone 0 → note_raw = 0x40 (64).
533        // b1 = instrument=3 (<< 3), vol_lo=4 → 0x1C
534        // b2 = vol_hi=2 (<< 4), command=5 → 0x25
535        // b3 = command param 0x0A
536        let cell_off = PATTERN_DATA_OFFSET;
537        bytes[cell_off] = 0x40;
538        bytes[cell_off + 1] = (3 << 3) | 4;
539        bytes[cell_off + 2] = (2 << 4) | 5;
540        bytes[cell_off + 3] = 0x0A;
541
542        let h = parse_header(&bytes).unwrap();
543        let pats = parse_patterns(&h, &bytes);
544        let c = pats[0].rows[0][0];
545        assert_eq!(c.note_raw, 0x40);
546        assert_eq!(
547            c.kind(),
548            StmNoteKind::Note {
549                octave: 4,
550                semitone: 0
551            }
552        );
553        assert_eq!(c.instrument, 3);
554        assert_eq!(c.volume, (2 << 3) | 4);
555        assert_eq!(c.command, 5);
556        assert_eq!(c.command_param, 0x0A);
557    }
558
559    #[test]
560    fn cell_kind_classifies_reserved_values() {
561        let make = |n: u8| StmCell {
562            note_raw: n,
563            ..StmCell::default()
564        };
565        assert_eq!(make(251).kind(), StmNoteKind::Empty);
566        assert_eq!(make(252).kind(), StmNoteKind::DashNote);
567        assert_eq!(make(253).kind(), StmNoteKind::Dots);
568        assert_eq!(make(254).kind(), StmNoteKind::Reserved);
569    }
570
571    #[test]
572    fn extract_samples_reads_instrument_body() {
573        let bytes = build_minimal_stm(1);
574        let h = parse_header(&bytes).unwrap();
575        let samples = extract_samples(&h, &bytes);
576        assert_eq!(samples.len(), INSTRUMENT_COUNT);
577        assert_eq!(samples[0].pcm.len(), 4);
578        assert_eq!(samples[0].pcm[0], 0x10);
579        // 0xF0 as signed i8 is -16.
580        assert_eq!(samples[0].pcm[1], -16);
581        for s in &samples[1..] {
582            assert!(s.pcm.is_empty());
583        }
584    }
585
586    #[test]
587    fn extract_samples_handles_truncated_body() {
588        let mut bytes = build_minimal_stm(1);
589        // Cut the last 2 bytes of the sample body.
590        bytes.truncate(bytes.len() - 2);
591        let h = parse_header(&bytes).unwrap();
592        let samples = extract_samples(&h, &bytes);
593        assert_eq!(samples[0].pcm.len(), 2);
594    }
595}