use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use super::personality::VoicePersonality;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct VoiceConfig {
#[serde(default)]
pub enabled: bool,
#[serde(default)]
pub audio: AudioConfig,
#[serde(default)]
pub stt: SttConfig,
#[serde(default)]
pub tts: TtsConfig,
#[serde(default)]
pub vad: VadConfig,
#[serde(default)]
pub wake: WakeConfig,
#[serde(default, alias = "cloudFallback")]
pub cloud_fallback: CloudFallbackConfig,
#[serde(default, alias = "personalities")]
pub personalities: HashMap<String, VoicePersonality>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AudioConfig {
#[serde(default = "default_sample_rate", alias = "sampleRate")]
pub sample_rate: u32,
#[serde(default = "default_chunk_size", alias = "chunkSize")]
pub chunk_size: u32,
#[serde(default = "default_audio_channels")]
pub channels: u16,
#[serde(default, alias = "inputDevice")]
pub input_device: Option<String>,
#[serde(default, alias = "outputDevice")]
pub output_device: Option<String>,
}
fn default_sample_rate() -> u32 {
16000
}
fn default_chunk_size() -> u32 {
512
}
fn default_audio_channels() -> u16 {
1
}
impl Default for AudioConfig {
fn default() -> Self {
Self {
sample_rate: default_sample_rate(),
chunk_size: default_chunk_size(),
channels: default_audio_channels(),
input_device: None,
output_device: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SttConfig {
#[serde(default = "super::default_true")]
pub enabled: bool,
#[serde(default = "default_stt_model")]
pub model: String,
#[serde(default)]
pub language: String,
}
fn default_stt_model() -> String {
"sherpa-onnx-streaming-zipformer-en-20M".into()
}
impl Default for SttConfig {
fn default() -> Self {
Self {
enabled: true,
model: default_stt_model(),
language: String::new(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TtsConfig {
#[serde(default = "super::default_true")]
pub enabled: bool,
#[serde(default = "default_tts_provider")]
pub provider: String,
#[serde(default = "default_tts_model")]
pub model: String,
#[serde(default)]
pub voice: String,
#[serde(default = "default_speed")]
pub speed: f32,
}
fn default_tts_provider() -> String {
"browser".into()
}
fn default_tts_model() -> String {
"vits-piper-en_US-amy-medium".into()
}
fn default_speed() -> f32 {
1.0
}
impl Default for TtsConfig {
fn default() -> Self {
Self {
enabled: true,
provider: default_tts_provider(),
model: default_tts_model(),
voice: String::new(),
speed: default_speed(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VadConfig {
#[serde(default = "default_vad_threshold")]
pub threshold: f32,
#[serde(default = "default_silence_timeout_ms", alias = "silenceTimeoutMs")]
pub silence_timeout_ms: u32,
#[serde(default = "default_min_speech_ms", alias = "minSpeechMs")]
pub min_speech_ms: u32,
}
fn default_vad_threshold() -> f32 {
0.5
}
fn default_silence_timeout_ms() -> u32 {
1500
}
fn default_min_speech_ms() -> u32 {
250
}
impl Default for VadConfig {
fn default() -> Self {
Self {
threshold: default_vad_threshold(),
silence_timeout_ms: default_silence_timeout_ms(),
min_speech_ms: default_min_speech_ms(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WakeConfig {
#[serde(default)]
pub enabled: bool,
#[serde(default = "default_wake_phrase")]
pub phrase: String,
#[serde(default = "default_wake_sensitivity")]
pub sensitivity: f32,
#[serde(default, alias = "modelPath")]
pub model_path: Option<String>,
}
fn default_wake_phrase() -> String {
"hey weft".into()
}
fn default_wake_sensitivity() -> f32 {
0.5
}
impl Default for WakeConfig {
fn default() -> Self {
Self {
enabled: false,
phrase: default_wake_phrase(),
sensitivity: default_wake_sensitivity(),
model_path: None,
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct CloudFallbackConfig {
#[serde(default)]
pub enabled: bool,
#[serde(default, alias = "sttProvider")]
pub stt_provider: String,
#[serde(default, alias = "ttsProvider")]
pub tts_provider: String,
}