atomr_agents_stt_core/
audio.rs1use std::path::PathBuf;
7
8use bytes::Bytes;
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, Clone)]
12pub enum AudioInput {
13 File(PathBuf),
17 Bytes { data: Bytes, format: AudioFormat },
19 Pcm(PcmBuffer),
22}
23
24#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
25#[serde(tag = "kind", rename_all = "snake_case")]
26pub enum AudioFormat {
27 Pcm {
28 sample_rate: u32,
29 channels: u16,
30 sample: SampleType,
31 },
32 Wav,
33 Mp3,
34 Flac,
35 Ogg,
36 Opus,
37 Webm,
38 Mp4,
39 Aac,
40 Mulaw {
42 sample_rate: u32,
43 },
44}
45
46impl AudioFormat {
47 pub fn mime(&self) -> &'static str {
49 match self {
50 AudioFormat::Pcm { .. } => "audio/wav",
51 AudioFormat::Wav => "audio/wav",
52 AudioFormat::Mp3 => "audio/mpeg",
53 AudioFormat::Flac => "audio/flac",
54 AudioFormat::Ogg => "audio/ogg",
55 AudioFormat::Opus => "audio/opus",
56 AudioFormat::Webm => "audio/webm",
57 AudioFormat::Mp4 => "audio/mp4",
58 AudioFormat::Aac => "audio/aac",
59 AudioFormat::Mulaw { .. } => "audio/basic",
60 }
61 }
62
63 pub fn extension(&self) -> &'static str {
65 match self {
66 AudioFormat::Pcm { .. } | AudioFormat::Wav => "wav",
67 AudioFormat::Mp3 => "mp3",
68 AudioFormat::Flac => "flac",
69 AudioFormat::Ogg => "ogg",
70 AudioFormat::Opus => "opus",
71 AudioFormat::Webm => "webm",
72 AudioFormat::Mp4 => "mp4",
73 AudioFormat::Aac => "aac",
74 AudioFormat::Mulaw { .. } => "raw",
75 }
76 }
77}
78
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
80#[serde(rename_all = "snake_case")]
81pub enum SampleType {
82 I16,
83 I32,
84 F32,
85}
86
87#[derive(Debug, Clone)]
90pub struct PcmBuffer {
91 pub samples: Vec<f32>,
92 pub sample_rate: u32,
93 pub channels: u16,
94}
95
96impl PcmBuffer {
97 pub fn new(samples: Vec<f32>, sample_rate: u32, channels: u16) -> Self {
98 Self {
99 samples,
100 sample_rate,
101 channels,
102 }
103 }
104
105 pub fn duration_secs(&self) -> f32 {
106 if self.sample_rate == 0 || self.channels == 0 {
107 0.0
108 } else {
109 (self.samples.len() as f32) / (self.sample_rate as f32 * self.channels as f32)
110 }
111 }
112}