use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::fmt;
use std::hash::{Hash, Hasher};
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AudioSourceType {
#[default]
Input,
System,
Mixed,
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RecordingMode {
#[default]
Mixed,
EchoCancel,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AudioDevice {
pub id: String,
pub name: String,
#[serde(default)]
pub source_type: AudioSourceType,
}
pub struct AudioData {
pub samples: Vec<f32>,
pub channels: u16,
pub sample_rate: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StreamingAudioData {
pub samples: Vec<f32>,
pub sample_rate: u32,
pub sample_offset: u64,
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum KeyCode {
RightAlt,
LeftAlt,
RightControl,
LeftControl,
#[default]
RightShift,
LeftShift,
CapsLock,
LeftMeta,
RightMeta,
F1,
F2,
F3,
F4,
F5,
F6,
F7,
F8,
F9,
F10,
F11,
F12,
F13,
F14,
F15,
F16,
F17,
F18,
F19,
F20,
F21,
F22,
F23,
F24,
KeyA,
KeyB,
KeyC,
KeyD,
KeyE,
KeyF,
KeyG,
KeyH,
KeyI,
KeyJ,
KeyK,
KeyL,
KeyM,
KeyN,
KeyO,
KeyP,
KeyQ,
KeyR,
KeyS,
KeyT,
KeyU,
KeyV,
KeyW,
KeyX,
KeyY,
KeyZ,
Digit0,
Digit1,
Digit2,
Digit3,
Digit4,
Digit5,
Digit6,
Digit7,
Digit8,
Digit9,
ArrowUp,
ArrowDown,
ArrowLeft,
ArrowRight,
Home,
End,
PageUp,
PageDown,
Insert,
Delete,
Escape,
Tab,
Space,
Enter,
Backspace,
PrintScreen,
ScrollLock,
Pause,
Minus,
Equal,
BracketLeft,
BracketRight,
Backslash,
Semicolon,
Quote,
Backquote,
Comma,
Period,
Slash,
Numpad0,
Numpad1,
Numpad2,
Numpad3,
Numpad4,
Numpad5,
Numpad6,
Numpad7,
Numpad8,
Numpad9,
NumpadMultiply,
NumpadAdd,
NumpadSubtract,
NumpadDecimal,
NumpadDivide,
NumLock,
}
impl KeyCode {
pub fn display_name(&self) -> &'static str {
match self {
KeyCode::RightAlt => "Right Alt",
KeyCode::LeftAlt => "Left Alt",
KeyCode::RightControl => "Right Ctrl",
KeyCode::LeftControl => "Left Ctrl",
KeyCode::RightShift => "Right Shift",
KeyCode::LeftShift => "Left Shift",
KeyCode::CapsLock => "Caps Lock",
KeyCode::LeftMeta => "Left Win",
KeyCode::RightMeta => "Right Win",
KeyCode::F1 => "F1",
KeyCode::F2 => "F2",
KeyCode::F3 => "F3",
KeyCode::F4 => "F4",
KeyCode::F5 => "F5",
KeyCode::F6 => "F6",
KeyCode::F7 => "F7",
KeyCode::F8 => "F8",
KeyCode::F9 => "F9",
KeyCode::F10 => "F10",
KeyCode::F11 => "F11",
KeyCode::F12 => "F12",
KeyCode::F13 => "F13",
KeyCode::F14 => "F14",
KeyCode::F15 => "F15",
KeyCode::F16 => "F16",
KeyCode::F17 => "F17",
KeyCode::F18 => "F18",
KeyCode::F19 => "F19",
KeyCode::F20 => "F20",
KeyCode::F21 => "F21",
KeyCode::F22 => "F22",
KeyCode::F23 => "F23",
KeyCode::F24 => "F24",
KeyCode::KeyA => "A",
KeyCode::KeyB => "B",
KeyCode::KeyC => "C",
KeyCode::KeyD => "D",
KeyCode::KeyE => "E",
KeyCode::KeyF => "F",
KeyCode::KeyG => "G",
KeyCode::KeyH => "H",
KeyCode::KeyI => "I",
KeyCode::KeyJ => "J",
KeyCode::KeyK => "K",
KeyCode::KeyL => "L",
KeyCode::KeyM => "M",
KeyCode::KeyN => "N",
KeyCode::KeyO => "O",
KeyCode::KeyP => "P",
KeyCode::KeyQ => "Q",
KeyCode::KeyR => "R",
KeyCode::KeyS => "S",
KeyCode::KeyT => "T",
KeyCode::KeyU => "U",
KeyCode::KeyV => "V",
KeyCode::KeyW => "W",
KeyCode::KeyX => "X",
KeyCode::KeyY => "Y",
KeyCode::KeyZ => "Z",
KeyCode::Digit0 => "0",
KeyCode::Digit1 => "1",
KeyCode::Digit2 => "2",
KeyCode::Digit3 => "3",
KeyCode::Digit4 => "4",
KeyCode::Digit5 => "5",
KeyCode::Digit6 => "6",
KeyCode::Digit7 => "7",
KeyCode::Digit8 => "8",
KeyCode::Digit9 => "9",
KeyCode::ArrowUp => "Up",
KeyCode::ArrowDown => "Down",
KeyCode::ArrowLeft => "Left",
KeyCode::ArrowRight => "Right",
KeyCode::Home => "Home",
KeyCode::End => "End",
KeyCode::PageUp => "Page Up",
KeyCode::PageDown => "Page Down",
KeyCode::Insert => "Insert",
KeyCode::Delete => "Delete",
KeyCode::Escape => "Esc",
KeyCode::Tab => "Tab",
KeyCode::Space => "Space",
KeyCode::Enter => "Enter",
KeyCode::Backspace => "Backspace",
KeyCode::PrintScreen => "Print Screen",
KeyCode::ScrollLock => "Scroll Lock",
KeyCode::Pause => "Pause",
KeyCode::Minus => "-",
KeyCode::Equal => "=",
KeyCode::BracketLeft => "[",
KeyCode::BracketRight => "]",
KeyCode::Backslash => "\\",
KeyCode::Semicolon => ";",
KeyCode::Quote => "'",
KeyCode::Backquote => "`",
KeyCode::Comma => ",",
KeyCode::Period => ".",
KeyCode::Slash => "/",
KeyCode::Numpad0 => "Num 0",
KeyCode::Numpad1 => "Num 1",
KeyCode::Numpad2 => "Num 2",
KeyCode::Numpad3 => "Num 3",
KeyCode::Numpad4 => "Num 4",
KeyCode::Numpad5 => "Num 5",
KeyCode::Numpad6 => "Num 6",
KeyCode::Numpad7 => "Num 7",
KeyCode::Numpad8 => "Num 8",
KeyCode::Numpad9 => "Num 9",
KeyCode::NumpadMultiply => "Num *",
KeyCode::NumpadAdd => "Num +",
KeyCode::NumpadSubtract => "Num -",
KeyCode::NumpadDecimal => "Num .",
KeyCode::NumpadDivide => "Num /",
KeyCode::NumLock => "Num Lock",
}
}
pub fn is_modifier(&self) -> bool {
matches!(
self,
KeyCode::LeftControl
| KeyCode::RightControl
| KeyCode::LeftAlt
| KeyCode::RightAlt
| KeyCode::LeftShift
| KeyCode::RightShift
| KeyCode::LeftMeta
| KeyCode::RightMeta
)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HotkeyCombination {
pub keys: Vec<KeyCode>,
}
impl HotkeyCombination {
pub fn new(keys: Vec<KeyCode>) -> Self {
let mut unique: Vec<KeyCode> = keys
.into_iter()
.collect::<HashSet<_>>()
.into_iter()
.collect();
unique.sort_by_key(|k| format!("{:?}", k));
Self { keys: unique }
}
pub fn single(key: KeyCode) -> Self {
Self { keys: vec![key] }
}
pub fn display(&self) -> String {
let mut modifiers: Vec<&KeyCode> = Vec::new();
let mut others: Vec<&KeyCode> = Vec::new();
for k in &self.keys {
if k.is_modifier() {
modifiers.push(k);
} else {
others.push(k);
}
}
modifiers.sort_by_key(|k| format!("{:?}", k));
others.sort_by_key(|k| format!("{:?}", k));
let all: Vec<&str> = modifiers
.iter()
.chain(others.iter())
.map(|k| k.display_name())
.collect();
all.join(" + ")
}
pub fn is_subset_of(&self, pressed: &HashSet<KeyCode>) -> bool {
self.keys.iter().all(|k| pressed.contains(k))
}
}
impl PartialEq for HotkeyCombination {
fn eq(&self, other: &Self) -> bool {
let a: HashSet<_> = self.keys.iter().collect();
let b: HashSet<_> = other.keys.iter().collect();
a == b
}
}
impl Eq for HotkeyCombination {}
impl Hash for HotkeyCombination {
fn hash<H: Hasher>(&self, state: &mut H) {
let mut sorted: Vec<_> = self.keys.clone();
sorted.sort_by_key(|k| format!("{:?}", k));
sorted.hash(state);
}
}
impl fmt::Display for HotkeyCombination {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.display())
}
}
impl Default for HotkeyCombination {
fn default() -> Self {
Self::single(KeyCode::default())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpectrogramColumn {
pub colors: Vec<u8>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VisualizationData {
pub waveform: Vec<f32>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub spectrogram: Vec<SpectrogramColumn>,
#[serde(skip_serializing_if = "Option::is_none")]
pub speech_metrics: Option<SpeechMetrics>,
pub sample_rate: u32,
pub frame_interval_ms: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpeechMetrics {
pub amplitude_db: f32,
pub zcr: f32,
pub centroid_hz: f32,
pub is_speaking: bool,
pub voiced_onset_pending: bool,
pub whisper_onset_pending: bool,
pub is_transient: bool,
pub is_lookback_speech: bool,
pub is_word_break: bool,
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum WhisperModel {
TinyEn,
Tiny,
#[default]
BaseEn,
Base,
SmallEn,
Small,
MediumEn,
Medium,
LargeV3Turbo,
LargeV3,
}
impl WhisperModel {
pub fn config_key(self) -> &'static str {
match self {
WhisperModel::TinyEn => "tiny_en",
WhisperModel::Tiny => "tiny",
WhisperModel::BaseEn => "base_en",
WhisperModel::Base => "base",
WhisperModel::SmallEn => "small_en",
WhisperModel::Small => "small",
WhisperModel::MediumEn => "medium_en",
WhisperModel::Medium => "medium",
WhisperModel::LargeV3Turbo => "large_v3_turbo",
WhisperModel::LargeV3 => "large_v3",
}
}
pub fn parse_identifier(value: &str) -> Option<Self> {
let normalized = value.trim().trim_matches('"');
let normalized = normalized.strip_prefix("ggml-").unwrap_or(normalized);
let normalized = normalized.strip_suffix(".bin").unwrap_or(normalized);
let normalized = normalized.to_ascii_lowercase();
match normalized.as_str() {
"tiny_en" | "tiny.en" => Some(WhisperModel::TinyEn),
"tiny" => Some(WhisperModel::Tiny),
"base_en" | "base.en" => Some(WhisperModel::BaseEn),
"base" => Some(WhisperModel::Base),
"small_en" | "small.en" => Some(WhisperModel::SmallEn),
"small" => Some(WhisperModel::Small),
"medium_en" | "medium.en" => Some(WhisperModel::MediumEn),
"medium" => Some(WhisperModel::Medium),
"large_v3_turbo" | "large-v3-turbo" => Some(WhisperModel::LargeV3Turbo),
"large_v3" | "large-v3" => Some(WhisperModel::LargeV3),
_ => None,
}
}
pub fn slug(self) -> &'static str {
match self {
WhisperModel::TinyEn => "tiny.en",
WhisperModel::Tiny => "tiny",
WhisperModel::BaseEn => "base.en",
WhisperModel::Base => "base",
WhisperModel::SmallEn => "small.en",
WhisperModel::Small => "small",
WhisperModel::MediumEn => "medium.en",
WhisperModel::Medium => "medium",
WhisperModel::LargeV3Turbo => "large-v3-turbo",
WhisperModel::LargeV3 => "large-v3",
}
}
pub fn download_url(self) -> String {
format!(
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-{}.bin",
self.slug()
)
}
pub fn size_mb(self) -> u32 {
match self {
WhisperModel::TinyEn => 75,
WhisperModel::Tiny => 75,
WhisperModel::BaseEn => 142,
WhisperModel::Base => 142,
WhisperModel::SmallEn => 466,
WhisperModel::Small => 466,
WhisperModel::MediumEn => 1536,
WhisperModel::Medium => 1536,
WhisperModel::LargeV3Turbo => 1536,
WhisperModel::LargeV3 => 2970,
}
}
pub fn display_name(self) -> &'static str {
match self {
WhisperModel::TinyEn => "Tiny En",
WhisperModel::Tiny => "Tiny",
WhisperModel::BaseEn => "Base En",
WhisperModel::Base => "Base",
WhisperModel::SmallEn => "Small En",
WhisperModel::Small => "Small",
WhisperModel::MediumEn => "Medium En",
WhisperModel::Medium => "Medium",
WhisperModel::LargeV3Turbo => "Large V3 Turbo",
WhisperModel::LargeV3 => "Large V3",
}
}
pub fn all_in_size_order() -> &'static [WhisperModel] {
&[
WhisperModel::TinyEn,
WhisperModel::Tiny,
WhisperModel::BaseEn,
WhisperModel::Base,
WhisperModel::SmallEn,
WhisperModel::Small,
WhisperModel::MediumEn,
WhisperModel::Medium,
WhisperModel::LargeV3Turbo,
WhisperModel::LargeV3,
]
}
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TranscriptionProfile {
#[default]
Dictation,
Transcription,
Custom,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TranscriptionSegment {
pub id: String,
pub text: String,
pub timestamp_offset_ms: u64,
pub duration_ms: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub audio_path: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TranscriptionResult {
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
pub text: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub timestamp: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub duration_ms: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub audio_path: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub timestamp_offset_ms: Option<u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HistoryEntry {
pub id: String,
pub text: String,
pub timestamp: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub wav_path: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelStatus {
pub available: bool,
pub path: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GpuStatus {
pub cuda_available: bool,
pub metal_available: bool,
pub system_info: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct EngineStatus {
pub capturing: bool,
pub in_speech: bool,
pub queue_depth: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub source1_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub source2_id: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "event", rename_all = "snake_case")]
pub enum EngineEvent {
VisualizationData(VisualizationData),
TranscriptionComplete(TranscriptionResult),
SpeechStarted,
SpeechEnded {
duration_ms: u64,
},
CaptureStateChanged {
capturing: bool,
#[serde(skip_serializing_if = "Option::is_none")]
error: Option<String>,
},
ModelDownloadProgress {
percent: u8,
},
ModelDownloadComplete {
success: bool,
},
AudioLevelUpdate {
device_id: String,
level_db: f32,
},
TranscriptionSegment(TranscriptionSegment),
RecordingStarted,
RecordingStopped {
duration_ms: u64,
},
PlaybackComplete,
AgcGainChanged(f32),
AudioData(StreamingAudioData),
RawAudioData(StreamingAudioData),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn streaming_audio_data_fields() {
let data = StreamingAudioData {
samples: vec![0.1, -0.5, 1.0],
sample_rate: 48000,
sample_offset: 96000,
};
assert_eq!(data.samples.len(), 3);
assert_eq!(data.sample_rate, 48000);
assert_eq!(data.sample_offset, 96000);
}
#[test]
fn streaming_audio_data_clone() {
let data = StreamingAudioData {
samples: vec![0.25, -0.75],
sample_rate: 48000,
sample_offset: 0,
};
let cloned = data.clone();
assert_eq!(cloned.samples, data.samples);
assert_eq!(cloned.sample_rate, data.sample_rate);
assert_eq!(cloned.sample_offset, data.sample_offset);
}
#[test]
fn streaming_audio_data_serialization_round_trip() {
let data = StreamingAudioData {
samples: vec![0.0, 0.5, -0.5, 1.0, -1.0],
sample_rate: 48000,
sample_offset: 480000,
};
let json = serde_json::to_string(&data).expect("serialize");
let deserialized: StreamingAudioData = serde_json::from_str(&json).expect("deserialize");
assert_eq!(deserialized.samples, data.samples);
assert_eq!(deserialized.sample_rate, data.sample_rate);
assert_eq!(deserialized.sample_offset, data.sample_offset);
}
#[test]
fn engine_event_audio_data_variant_matches() {
let event = EngineEvent::AudioData(StreamingAudioData {
samples: vec![0.1],
sample_rate: 48000,
sample_offset: 0,
});
match &event {
EngineEvent::AudioData(data) => {
assert_eq!(data.samples, vec![0.1]);
assert_eq!(data.sample_rate, 48000);
assert_eq!(data.sample_offset, 0);
}
_ => panic!("expected AudioData variant"),
}
}
#[test]
fn engine_event_raw_audio_data_variant_matches() {
let event = EngineEvent::RawAudioData(StreamingAudioData {
samples: vec![-0.3, 0.7],
sample_rate: 48000,
sample_offset: 960,
});
match &event {
EngineEvent::RawAudioData(data) => {
assert_eq!(data.samples, vec![-0.3, 0.7]);
assert_eq!(data.sample_rate, 48000);
assert_eq!(data.sample_offset, 960);
}
_ => panic!("expected RawAudioData variant"),
}
}
#[test]
fn engine_event_audio_data_serialization() {
let event = EngineEvent::AudioData(StreamingAudioData {
samples: vec![0.5],
sample_rate: 48000,
sample_offset: 0,
});
let json = serde_json::to_string(&event).expect("serialize");
assert!(json.contains("\"event\":\"audio_data\""));
assert!(json.contains("\"sample_rate\":48000"));
assert!(json.contains("\"sample_offset\":0"));
}
#[test]
fn engine_event_raw_audio_data_serialization() {
let event = EngineEvent::RawAudioData(StreamingAudioData {
samples: vec![-0.25],
sample_rate: 48000,
sample_offset: 480,
});
let json = serde_json::to_string(&event).expect("serialize");
assert!(json.contains("\"event\":\"raw_audio_data\""));
assert!(json.contains("\"sample_rate\":48000"));
assert!(json.contains("\"sample_offset\":480"));
}
#[test]
fn streaming_audio_data_timestamp_computation() {
let data = StreamingAudioData {
samples: vec![0.0; 480],
sample_rate: 48000,
sample_offset: 480000,
};
let timestamp_seconds = data.sample_offset as f64 / data.sample_rate as f64;
assert!(
(timestamp_seconds - 10.0).abs() < 1e-9,
"expected 10.0s, got {}",
timestamp_seconds
);
}
#[test]
fn streaming_audio_data_first_chunk_offset_zero() {
let data = StreamingAudioData {
samples: vec![0.0; 480],
sample_rate: 48000,
sample_offset: 0,
};
let timestamp_seconds = data.sample_offset as f64 / data.sample_rate as f64;
assert_eq!(timestamp_seconds, 0.0);
}
#[test]
fn streaming_audio_data_offset_increment() {
let chunk1 = StreamingAudioData {
samples: vec![0.0; 480],
sample_rate: 48000,
sample_offset: 0,
};
let next_offset = chunk1.sample_offset + chunk1.samples.len() as u64;
let chunk2 = StreamingAudioData {
samples: vec![0.0; 480],
sample_rate: 48000,
sample_offset: next_offset,
};
assert_eq!(chunk2.sample_offset, 480);
let next_offset2 = chunk2.sample_offset + chunk2.samples.len() as u64;
assert_eq!(next_offset2, 960);
}
}