use crate::core::audio::AudioService;
use crate::core::audio::types::SpeechRequest;
use crate::core::types::model::ProviderCapability;
use crate::server::state::AppState;
use actix_web::{HttpRequest, HttpResponse, Result as ActixResult, web};
use serde::Deserialize;
use tracing::{error, info};
use crate::server::routes::ai::context::get_request_context;
use crate::server::routes::ai::openai_errors;
use crate::server::routes::ai::provider_selection::select_provider_for_model;
#[derive(Debug, Deserialize)]
pub struct AudioSpeechRequest {
pub input: String,
#[serde(default = "default_tts_model")]
pub model: String,
pub voice: String,
pub response_format: Option<String>,
pub speed: Option<f32>,
}
fn default_tts_model() -> String {
"tts-1".to_string()
}
pub async fn audio_speech(
state: web::Data<AppState>,
req: HttpRequest,
request: web::Json<AudioSpeechRequest>,
) -> ActixResult<HttpResponse> {
info!(
"Audio speech request: model={}, voice={}, text_len={}",
request.model,
request.voice,
request.input.len()
);
let _context = match get_request_context(&req) {
Ok(ctx) => ctx,
Err(_) => {
return Ok(openai_errors::unauthorized_error("Unauthorized"));
}
};
let unified_router = &state.unified_router;
let selected_model = match select_provider_for_model(
unified_router,
&request.model,
ProviderCapability::TextToSpeech,
) {
Ok(selection) => selection,
Err(e) => return Ok(openai_errors::gateway_error_response(&e)),
};
let speech_request = SpeechRequest {
input: request.input.clone(),
model: selected_model,
voice: request.voice.clone(),
response_format: request.response_format.clone(),
speed: request.speed,
};
let audio_service = AudioService::new();
match audio_service.speech(speech_request).await {
Ok(response) => Ok(HttpResponse::Ok()
.content_type(response.content_type)
.body(response.audio)),
Err(e) => {
error!("Speech generation error: {}", e);
Ok(openai_errors::gateway_error_response(&e))
}
}
}