use reqwest::Client;
use reqwest::multipart;
use std::fs::File;
use std::io::Read;
use std::path::Path;
#[derive(Debug, thiserror::Error)]
pub enum TtsError {
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
#[error("Failed to synthesize speech")]
TtsSynthesisError(String),
#[error("API request error: {0}")]
ApiRequestError(#[from] reqwest::Error),
#[error("API key not found")]
ApiKeyNotFound(#[from] std::env::VarError),
}
#[derive(Debug, thiserror::Error)]
pub enum SttError {
#[error("Failed to get API key")]
EnvVar(#[from] std::env::VarError),
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
#[error("Failed to create multipart request")]
Multipart(#[from] reqwest::Error),
#[error("Failed to set mime type")]
Mime(#[from] reqwest::header::InvalidHeaderValue),
}
pub async fn text_to_speech(text: &str, file_path: &str) -> Result<(), TtsError> {
tracing::debug!("tts text: {}, output file: {}", text, file_path);
let api_key = std::env::var("OPENAI_API_KEY").map_err(TtsError::ApiKeyNotFound)?;
#[derive(serde::Serialize)]
struct TtsRequest {
model: String,
input: String,
voice: String,
response_format: String,
speed: f32,
}
let request_body = TtsRequest {
model: "tts-1".to_string(), input: text.to_string(),
voice: "alloy".to_string(), response_format: "mp3".to_string(),
speed: 1.0,
};
let client = reqwest::Client::new();
let response = client
.post("https://api.openai.com/v1/audio/speech")
.bearer_auth(api_key)
.json(&request_body)
.send()
.await
.map_err(TtsError::ApiRequestError)?;
if !response.status().is_success() {
let error_message = response
.text()
.await
.unwrap_or_else(|_| "Unknown error".to_string());
return Err(TtsError::TtsSynthesisError(format!(
"OpenAI API error: {}",
error_message
)));
}
let audio_bytes = response.bytes().await.map_err(TtsError::ApiRequestError)?;
if let Some(parent) = Path::new(file_path).parent() {
std::fs::create_dir_all(parent).map_err(TtsError::IoError)?;
}
std::fs::write(file_path, audio_bytes).map_err(TtsError::IoError)?;
Ok(())
}
pub async fn speech_to_text(file_path: &str) -> Result<String, SttError> {
let mut file = File::open(file_path)?;
let mut file_bytes = Vec::new();
file.read_to_end(&mut file_bytes)?;
let file_part = multipart::Part::bytes(file_bytes)
.file_name("audio.mp3")
.mime_str("audio/mpeg")?;
let form = multipart::Form::new()
.text("model", "whisper-1") .part("file", file_part);
let api_key = std::env::var("OPENAI_API_KEY").map_err(SttError::EnvVar)?;
let client = Client::new();
let response = client
.post("https://api.openai.com/v1/audio/transcriptions")
.bearer_auth(api_key)
.multipart(form)
.send()
.await?;
#[derive(serde::Deserialize)]
struct WhisperResponse {
text: String,
}
let status = response.status();
let text = response.json::<WhisperResponse>().await?.text;
tracing::debug!("Status: {}\nResponse: {}", status, text);
Ok(text)
}