Skip to main content

oximedia_mir/
types.rs

1//! Common types for MIR analysis.
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6/// Complete analysis result.
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct AnalysisResult {
9    /// Tempo analysis result.
10    pub tempo: Option<TempoResult>,
11
12    /// Beat tracking result.
13    pub beat: Option<BeatResult>,
14
15    /// Key detection result.
16    pub key: Option<KeyResult>,
17
18    /// Chord recognition result.
19    pub chord: Option<ChordResult>,
20
21    /// Melody extraction result.
22    pub melody: Option<MelodyResult>,
23
24    /// Structure analysis result.
25    pub structure: Option<StructureResult>,
26
27    /// Genre classification result.
28    pub genre: Option<GenreResult>,
29
30    /// Mood detection result.
31    pub mood: Option<MoodResult>,
32
33    /// Spectral features result.
34    pub spectral: Option<SpectralResult>,
35
36    /// Rhythm features result.
37    pub rhythm: Option<RhythmResult>,
38
39    /// Harmonic analysis result.
40    pub harmonic: Option<HarmonicResult>,
41
42    /// Sample rate of analyzed audio.
43    pub sample_rate: f32,
44
45    /// Duration of analyzed audio in seconds.
46    pub duration: f32,
47}
48
49/// Tempo detection result.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct TempoResult {
52    /// Detected BPM (beats per minute).
53    pub bpm: f32,
54
55    /// Confidence score (0.0 to 1.0).
56    pub confidence: f32,
57
58    /// Tempo stability (0.0 to 1.0, higher = more stable).
59    pub stability: f32,
60
61    /// Alternative tempo estimates.
62    pub alternatives: Vec<(f32, f32)>, // (BPM, confidence)
63}
64
65/// Beat tracking result.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct BeatResult {
68    /// Beat times in seconds.
69    pub beat_times: Vec<f32>,
70
71    /// Downbeat times in seconds.
72    pub downbeat_times: Vec<f32>,
73
74    /// Beat confidence scores.
75    pub beat_confidence: Vec<f32>,
76
77    /// Estimated time signature (numerator, denominator).
78    pub time_signature: Option<(u8, u8)>,
79}
80
81/// Key detection result.
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct KeyResult {
84    /// Detected key (e.g., "C major", "A minor").
85    pub key: String,
86
87    /// Root note (0-11, C=0).
88    pub root: u8,
89
90    /// Mode (true = major, false = minor).
91    pub is_major: bool,
92
93    /// Confidence score (0.0 to 1.0).
94    pub confidence: f32,
95
96    /// Key profile correlations.
97    pub profile_correlations: Vec<f32>,
98}
99
100/// Chord recognition result.
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct ChordResult {
103    /// Chord labels with timestamps.
104    pub chords: Vec<ChordLabel>,
105
106    /// Chord progression patterns.
107    pub progressions: Vec<String>,
108
109    /// Overall harmonic complexity (0.0 to 1.0).
110    pub complexity: f32,
111}
112
113/// Individual chord label.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ChordLabel {
116    /// Start time in seconds.
117    pub start: f32,
118
119    /// End time in seconds.
120    pub end: f32,
121
122    /// Chord name (e.g., "C", "Am", "G7").
123    pub label: String,
124
125    /// Confidence score (0.0 to 1.0).
126    pub confidence: f32,
127}
128
129/// Melody extraction result.
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct MelodyResult {
132    /// Pitch contour over time (Hz, 0 = no pitch).
133    pub pitch_contour: Vec<f32>,
134
135    /// Time points for pitch contour.
136    pub time_points: Vec<f32>,
137
138    /// Pitch confidence scores.
139    pub confidence: Vec<f32>,
140
141    /// Melodic range (min, max in Hz).
142    pub range: (f32, f32),
143
144    /// Melodic contour complexity.
145    pub complexity: f32,
146}
147
148/// Structure analysis result.
149#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct StructureResult {
151    /// Structural segments.
152    pub segments: Vec<Segment>,
153
154    /// Self-similarity matrix (flattened).
155    pub similarity_matrix: Vec<f32>,
156
157    /// Matrix dimensions.
158    pub matrix_size: usize,
159
160    /// Overall structural complexity.
161    pub complexity: f32,
162}
163
164/// Musical segment.
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct Segment {
167    /// Start time in seconds.
168    pub start: f32,
169
170    /// End time in seconds.
171    pub end: f32,
172
173    /// Segment label (e.g., "intro", "verse", "chorus").
174    pub label: String,
175
176    /// Confidence score (0.0 to 1.0).
177    pub confidence: f32,
178}
179
180/// Genre classification result.
181#[derive(Debug, Clone, Serialize, Deserialize)]
182pub struct GenreResult {
183    /// Genre predictions with confidence scores.
184    pub genres: HashMap<String, f32>,
185
186    /// Top genre.
187    pub top_genre_name: String,
188
189    /// Top genre confidence.
190    pub top_genre_confidence: f32,
191}
192
193impl GenreResult {
194    /// Get top genre and confidence.
195    #[must_use]
196    pub fn top_genre(&self) -> (&str, f32) {
197        (&self.top_genre_name, self.top_genre_confidence)
198    }
199}
200
201/// Mood detection result.
202#[derive(Debug, Clone, Serialize, Deserialize)]
203pub struct MoodResult {
204    /// Valence (negative to positive, -1.0 to 1.0).
205    pub valence: f32,
206
207    /// Arousal (calm to energetic, 0.0 to 1.0).
208    pub arousal: f32,
209
210    /// Mood labels with confidence.
211    pub moods: HashMap<String, f32>,
212
213    /// Emotional intensity (0.0 to 1.0).
214    pub intensity: f32,
215}
216
217/// Spectral features result.
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct SpectralResult {
220    /// Spectral centroid over time (Hz).
221    pub centroid: Vec<f32>,
222
223    /// Spectral rolloff over time (Hz).
224    pub rolloff: Vec<f32>,
225
226    /// Spectral flux over time.
227    pub flux: Vec<f32>,
228
229    /// Spectral contrast over time.
230    pub contrast: Vec<Vec<f32>>,
231
232    /// Mean spectral centroid.
233    pub mean_centroid: f32,
234
235    /// Mean spectral rolloff.
236    pub mean_rolloff: f32,
237
238    /// Mean spectral flux.
239    pub mean_flux: f32,
240}
241
242/// Rhythm features result.
243#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct RhythmResult {
245    /// Onset strength envelope.
246    pub onset_strength: Vec<f32>,
247
248    /// Onset times in seconds.
249    pub onset_times: Vec<f32>,
250
251    /// Rhythmic patterns.
252    pub patterns: Vec<RhythmPattern>,
253
254    /// Rhythmic complexity (0.0 to 1.0).
255    pub complexity: f32,
256
257    /// Syncopation measure (0.0 to 1.0).
258    pub syncopation: f32,
259}
260
261/// Rhythmic pattern.
262#[derive(Debug, Clone, Serialize, Deserialize)]
263pub struct RhythmPattern {
264    /// Pattern start time in seconds.
265    pub start: f32,
266
267    /// Pattern duration in seconds.
268    pub duration: f32,
269
270    /// Pattern description.
271    pub description: String,
272
273    /// Pattern strength (0.0 to 1.0).
274    pub strength: f32,
275}
276
277/// Harmonic analysis result.
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct HarmonicResult {
280    /// Harmonic component energy over time.
281    pub harmonic_energy: Vec<f32>,
282
283    /// Percussive component energy over time.
284    pub percussive_energy: Vec<f32>,
285
286    /// Harmonic-to-percussive ratio.
287    pub hpr_ratio: f32,
288
289    /// Pitch class profile (12 bins, C to B).
290    pub pitch_class_profile: Vec<f32>,
291
292    /// Chroma features over time.
293    pub chroma: Vec<Vec<f32>>,
294}
295
296/// Loudness analysis result.
297#[derive(Debug, Clone, Serialize, Deserialize)]
298pub struct LoudnessResult {
299    /// Integrated loudness (LUFS approximation).
300    pub integrated_loudness: f32,
301
302    /// Loudness range (LRA).
303    pub loudness_range: f32,
304
305    /// Peak loudness.
306    pub peak_loudness: f32,
307
308    /// True peak value.
309    pub true_peak: f32,
310}
311
312bitflags::bitflags! {
313    /// Feature set flags for selective feature extraction.
314    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
315    pub struct FeatureSet: u32 {
316        /// Spectral features.
317        const SPECTRAL = 0b0000_0001;
318        /// Rhythm features.
319        const RHYTHM = 0b0000_0010;
320        /// Harmonic features.
321        const HARMONIC = 0b0000_0100;
322        /// Tempo and beat.
323        const TEMPO = 0b0000_1000;
324        /// Key detection.
325        const KEY = 0b0001_0000;
326        /// Chord recognition.
327        const CHORD = 0b0010_0000;
328        /// Melody extraction.
329        const MELODY = 0b0100_0000;
330        /// All features.
331        const ALL = 0b0111_1111;
332    }
333}
334
335impl Default for FeatureSet {
336    fn default() -> Self {
337        Self::ALL
338    }
339}