#[ cfg( feature = "audio_processing" ) ]
use crate::client::OllamaClient;
#[ cfg( feature = "audio_processing" ) ]
use error_tools::untyped::{ format_err, Result as OllamaResult };
#[ cfg( feature = "audio_processing" ) ]
impl OllamaClient
{
#[ inline ]
pub fn with_audio_config( mut self, config : crate::audio::AudioProcessingConfig ) -> Self
{
self.audio_config = Some( config );
self
}
#[ inline ]
pub fn audio_config( &self ) -> Option< &crate::audio::AudioProcessingConfig >
{
self.audio_config.as_ref()
}
#[ inline ]
pub async fn speech_to_text( &mut self, request : crate::audio::SpeechToTextRequest ) -> OllamaResult< crate::audio::SpeechToTextResponse >
{
if !self.is_audio_format_supported( &request.format )
{
return Err( format_err!( "Audio format {:?} is not supported", request.format ) );
}
#[ cfg( feature = "circuit_breaker" ) ]
{
if let Some( ref circuit_breaker ) = &self.circuit_breaker
{
if !circuit_breaker.can_execute()
{
return Err( format_err!( "Circuit breaker is open" ) );
}
}
}
#[ cfg( feature = "rate_limiting" ) ]
{
if let Some( ref rate_limiter ) = &self.rate_limiter
{
if !rate_limiter.should_allow_request()
{
return Err( format_err!( "Rate limit exceeded. Please try again later." ) );
}
}
}
let start_time = std::time::Instant::now();
let request_id = format!( "req-{}", std::time::SystemTime::now().duration_since( std::time::UNIX_EPOCH ).unwrap().as_millis() );
let audio_data_len = request.audio_data.len();
let url = format!( "{}/api/audio/transcribe", self.base_url );
let form = reqwest::multipart::Form::new()
.text( "model", request.model.clone() )
.part( "audio", reqwest::multipart::Part::bytes( request.audio_data )
.file_name( format!( "audio.{}", request.format.file_extension() ) )
.mime_str( request.format.mime_type() )
.map_err( | e | format_err!( "Failed to set MIME type : {}", e ) )?
);
let response = self.client
.post( &url )
.multipart( form )
.timeout( self.timeout )
.send()
.await;
let processing_time_ms = start_time.elapsed().as_millis() as u64;
match response
{
Ok( resp ) =>
{
let status = resp.status();
if status.is_success()
{
let response_text = resp.text().await
.map_err( | e | format_err!( "Failed to read response : {}", e ) )?;
let transcription : serde_json::Value = serde_json::from_str( &response_text )
.map_err( | e | format_err!( "Failed to parse response : {}", e ) )?;
#[ cfg( feature = "circuit_breaker" ) ]
{
if let Some( ref circuit_breaker ) = &self.circuit_breaker
{
circuit_breaker.record_success();
}
}
#[ cfg( feature = "general_diagnostics" ) ]
{
if let Some( ref diagnostics ) = &self.diagnostics_collector
{
diagnostics.track_request_success( &request_id, audio_data_len );
}
}
Ok( crate::audio::SpeechToTextResponse
{
text : transcription[ "text" ].as_str().unwrap_or( "" ).to_string(),
confidence : transcription[ "confidence" ].as_f64(),
language : transcription[ "language" ].as_str().map( | s | s.to_string() ),
duration : Some( ( processing_time_ms as f64 ) / 1000.0 ),
metadata : None,
})
}
else
{
#[ cfg( feature = "circuit_breaker" ) ]
{
if let Some( ref circuit_breaker ) = &self.circuit_breaker
{
circuit_breaker.record_failure();
}
}
let error_text = resp.text().await.unwrap_or_else( | _ | "Unknown error".to_string() );
#[ cfg( feature = "general_diagnostics" ) ]
{
if let Some( ref diagnostics ) = &self.diagnostics_collector
{
diagnostics.track_request_failure( &request_id, status.as_u16(), &error_text );
}
}
Err( format_err!( "Speech-to-text failed : {}", error_text ) )
}
}
Err( e ) =>
{
#[ cfg( feature = "circuit_breaker" ) ]
{
if let Some( ref circuit_breaker ) = &self.circuit_breaker
{
circuit_breaker.record_failure();
}
}
#[ cfg( feature = "general_diagnostics" ) ]
{
if let Some( ref diagnostics ) = &self.diagnostics_collector
{
diagnostics.track_request_failure( &request_id, 500, &e.to_string() );
}
}
Err( format_err!( "Speech-to-text request failed : {}", e ) )
}
}
}
#[ inline ]
pub async fn text_to_speech( &mut self, request : crate::audio::TextToSpeechRequest ) -> OllamaResult< crate::audio::TextToSpeechResponse >
{
#[ cfg( feature = "circuit_breaker" ) ]
{
if let Some( ref circuit_breaker ) = &self.circuit_breaker
{
if !circuit_breaker.can_execute()
{
return Err( format_err!( "Circuit breaker is open" ) );
}
}
}
#[ cfg( feature = "rate_limiting" ) ]
{
if let Some( ref rate_limiter ) = &self.rate_limiter
{
if !rate_limiter.should_allow_request()
{
return Err( format_err!( "Rate limit exceeded. Please try again later." ) );
}
}
}
let start_time = std::time::Instant::now();
let request_id = format!( "req-{}", std::time::SystemTime::now().duration_since( std::time::UNIX_EPOCH ).unwrap().as_millis() );
let url = format!( "{}/api/audio/synthesize", self.base_url );
let request_body = serde_json::json!({
"model": request.model,
"text": request.text,
"voice": request.voice,
"format": request.format.file_extension(),
});
let response = self.client
.post( &url )
.json( &request_body )
.timeout( self.timeout )
.send()
.await;
let processing_time_ms = start_time.elapsed().as_millis() as u64;
match response
{
Ok( resp ) =>
{
let status = resp.status();
if status.is_success()
{
let audio_data = resp.bytes().await
.map_err( | e | format_err!( "Failed to read audio data : {}", e ) )?;
#[ cfg( feature = "circuit_breaker" ) ]
{
if let Some( ref circuit_breaker ) = &self.circuit_breaker
{
circuit_breaker.record_success();
}
}
#[ cfg( feature = "general_diagnostics" ) ]
{
if let Some( ref diagnostics ) = &self.diagnostics_collector
{
diagnostics.track_request_success( &request_id, audio_data.len() );
}
}
Ok( crate::audio::TextToSpeechResponse
{
audio_data : audio_data.to_vec(),
format : request.format,
duration : Some( ( processing_time_ms as f64 ) / 1000.0 ),
sample_rate : Some( 24000 ), metadata : None,
})
}
else
{
#[ cfg( feature = "circuit_breaker" ) ]
{
if let Some( ref circuit_breaker ) = &self.circuit_breaker
{
circuit_breaker.record_failure();
}
}
let error_text = resp.text().await.unwrap_or_else( | _ | "Unknown error".to_string() );
#[ cfg( feature = "general_diagnostics" ) ]
{
if let Some( ref diagnostics ) = &self.diagnostics_collector
{
diagnostics.track_request_failure( &request_id, status.as_u16(), &error_text );
}
}
Err( format_err!( "Text-to-speech failed : {}", error_text ) )
}
}
Err( e ) =>
{
#[ cfg( feature = "circuit_breaker" ) ]
{
if let Some( ref circuit_breaker ) = &self.circuit_breaker
{
circuit_breaker.record_failure();
}
}
#[ cfg( feature = "general_diagnostics" ) ]
{
if let Some( ref diagnostics ) = &self.diagnostics_collector
{
diagnostics.track_request_failure( &request_id, 500, &e.to_string() );
}
}
Err( format_err!( "Text-to-speech request failed : {}", e ) )
}
}
}
#[ inline ]
fn is_audio_format_supported( &self, format : &crate::audio::AudioFormat ) -> bool
{
matches!( format,
crate ::audio::AudioFormat::Mp3 |
crate ::audio::AudioFormat::Wav |
crate ::audio::AudioFormat::Ogg |
crate ::audio::AudioFormat::Flac
)
}
#[ inline ]
pub async fn voice_chat( &mut self, audio_input : Vec< u8 >, format : crate::audio::AudioFormat, model : String ) -> OllamaResult< crate::audio::TextToSpeechResponse >
{
let stt_request = crate::audio::SpeechToTextRequest
{
model : model.clone(),
audio_data : audio_input,
format,
language : None,
options : None,
};
let transcription = self.speech_to_text( stt_request ).await?;
#[ cfg( feature = "vision_support" ) ]
let chat_request = crate::ChatRequest
{
model : model.clone(),
messages : vec![
crate ::ChatMessage
{
role : crate::MessageRole::User,
content : transcription.text,
images : None,
#[ cfg( feature = "tool_calling" ) ]
tool_calls : None,
}
],
stream : None,
options : None,
#[ cfg( feature = "tool_calling" ) ]
tools : None,
#[ cfg( feature = "tool_calling" ) ]
tool_messages : None,
};
#[ cfg( not( feature = "vision_support" ) ) ]
let chat_request = crate::ChatRequest
{
model : model.clone(),
messages : vec![
crate ::Message
{
role : "user".to_string(),
content : transcription.text,
}
],
stream : None,
options : None,
#[ cfg( feature = "tool_calling" ) ]
tools : None,
#[ cfg( feature = "tool_calling" ) ]
tool_messages : None,
};
let chat_response = self.chat( chat_request ).await?;
let tts_request = crate::audio::TextToSpeechRequest
{
model,
text : chat_response.message.content,
voice : Some( "default".to_string() ),
format : crate::audio::AudioFormat::Mp3,
speed : None,
options : None,
};
self.text_to_speech( tts_request ).await
}
}