use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct TtsRequest {
pub text: String,
pub voice: Option<String>,
pub format: Option<String>,
pub speed: Option<f32>,
pub model: Option<String>,
pub extra_params: HashMap<String, serde_json::Value>,
}
impl TtsRequest {
pub fn new(text: String) -> Self {
Self {
text,
voice: None,
format: None,
speed: None,
model: None,
extra_params: HashMap::new(),
}
}
pub fn with_voice(mut self, voice: String) -> Self {
self.voice = Some(voice);
self
}
pub fn with_format(mut self, format: String) -> Self {
self.format = Some(format);
self
}
pub const fn with_speed(mut self, speed: f32) -> Self {
self.speed = Some(speed);
self
}
}
#[derive(Debug, Clone)]
pub struct TtsResponse {
pub audio_data: Vec<u8>,
pub format: String,
pub duration: Option<f32>,
pub sample_rate: Option<u32>,
pub metadata: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone)]
pub struct SttRequest {
pub audio_data: Option<Vec<u8>>,
pub file_path: Option<String>,
pub format: Option<String>,
pub language: Option<String>,
pub model: Option<String>,
pub timestamp_granularities: Option<Vec<String>>,
pub extra_params: HashMap<String, serde_json::Value>,
}
impl SttRequest {
pub fn from_audio(audio_data: Vec<u8>) -> Self {
Self {
audio_data: Some(audio_data),
file_path: None,
format: None,
language: None,
model: None,
timestamp_granularities: None,
extra_params: HashMap::new(),
}
}
pub fn from_file(file_path: String) -> Self {
Self {
audio_data: None,
file_path: Some(file_path),
format: None,
language: None,
model: None,
timestamp_granularities: None,
extra_params: HashMap::new(),
}
}
}
#[derive(Debug, Clone)]
pub struct SttResponse {
pub text: String,
pub language: Option<String>,
pub confidence: Option<f32>,
pub words: Option<Vec<WordTimestamp>>,
pub duration: Option<f32>,
pub metadata: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone)]
pub struct WordTimestamp {
pub word: String,
pub start: f32,
pub end: f32,
pub confidence: Option<f32>,
}
#[derive(Debug, Clone)]
pub struct AudioTranslationRequest {
pub audio_data: Option<Vec<u8>>,
pub file_path: Option<String>,
pub format: Option<String>,
pub model: Option<String>,
pub extra_params: HashMap<String, serde_json::Value>,
}
impl AudioTranslationRequest {
pub fn from_audio(audio_data: Vec<u8>) -> Self {
Self {
audio_data: Some(audio_data),
file_path: None,
format: None,
model: None,
extra_params: HashMap::new(),
}
}
pub fn from_file(file_path: String) -> Self {
Self {
audio_data: None,
file_path: Some(file_path),
format: None,
model: None,
extra_params: HashMap::new(),
}
}
}
#[derive(Debug, Clone)]
pub struct VoiceInfo {
pub id: String,
pub name: String,
pub description: Option<String>,
pub language: Option<String>,
pub gender: Option<String>,
pub category: Option<String>,
}
#[derive(Debug, Clone)]
pub struct LanguageInfo {
pub code: String,
pub name: String,
pub supports_transcription: bool,
pub supports_translation: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum AudioFeature {
TextToSpeech,
StreamingTTS,
SpeechToText,
AudioTranslation,
RealtimeProcessing,
SpeakerDiarization,
CharacterTiming,
AudioEventDetection,
VoiceCloning,
AudioEnhancement,
MultimodalAudio,
}