sarvam-rs 0.2.0

Rust SDK for Sarvam AI APIs — chat, translation, speech-to-text, text-to-speech, transliteration, and language identification
Documentation
use std::path::Path;

use crate::config::SarvamConfig;
use crate::error::{Result, SarvamError};
use crate::types::speech_to_text::*;

pub struct SpeechToTextTranslateApi {
    config: SarvamConfig,
    client: reqwest::Client,
}

pub struct SttTranslateRequestBuilder {
    config: SarvamConfig,
    client: reqwest::Client,
    file_path: String,
    model: Option<SpeechToTextTranslateModel>,
    prompt: Option<String>,
    input_audio_codec: Option<InputAudioCodec>,
}

impl SttTranslateRequestBuilder {
    pub fn model(mut self, model: SpeechToTextTranslateModel) -> Self {
        self.model = Some(model);
        self
    }

    pub fn prompt(mut self, prompt: impl Into<String>) -> Self {
        self.prompt = Some(prompt.into());
        self
    }

    pub fn input_audio_codec(mut self, codec: InputAudioCodec) -> Self {
        self.input_audio_codec = Some(codec);
        self
    }

    pub async fn send(self) -> Result<SpeechToTextTranslateResponse> {
        let file_name = Path::new(&self.file_path)
            .file_name()
            .map(|n| n.to_string_lossy().to_string())
            .unwrap_or_else(|| "audio.wav".to_string());

        let file_bytes = tokio::fs::read(&self.file_path).await.map_err(|e| {
            SarvamError::Custom(format!("Failed to read file '{}': {}", self.file_path, e))
        })?;

        let file_part = reqwest::multipart::Part::bytes(file_bytes)
            .file_name(file_name)
            .mime_str("application/octet-stream")
            .map_err(|e| SarvamError::Custom(e.to_string()))?;

        let mut form = reqwest::multipart::Form::new().part("file", file_part);

        if let Some(model) = &self.model {
            let val = serde_json::to_value(model)
                .map_err(|e| SarvamError::Custom(e.to_string()))?
                .as_str()
                .ok_or_else(|| SarvamError::Custom("Invalid model value".into()))?
                .to_string();
            form = form.text("model", val);
        }

        if let Some(prompt) = self.prompt {
            form = form.text("prompt", prompt);
        }

        if let Some(codec) = &self.input_audio_codec {
            let val = serde_json::to_value(codec)
                .map_err(|e| SarvamError::Custom(e.to_string()))?
                .as_str()
                .ok_or_else(|| SarvamError::Custom("Invalid codec value".into()))?
                .to_string();
            form = form.text("input_audio_codec", val);
        }

        let url = format!("{}/speech-to-text-translate", self.config.base_url);

        let response = self
            .client
            .post(&url)
            .header("api-subscription-key", &self.config.api_subscription_key)
            .multipart(form)
            .timeout(self.config.timeout)
            .send()
            .await?;

        let status = response.status();
        if !status.is_success() {
            let body = response.text().await.unwrap_or_default();
            return Err(SarvamError::from_response(status, &body));
        }

        response
            .json::<SpeechToTextTranslateResponse>()
            .await
            .map_err(SarvamError::from)
    }
}

impl SpeechToTextTranslateApi {
    pub(crate) fn new(config: SarvamConfig, client: reqwest::Client) -> Self {
        Self { config, client }
    }

    pub fn translate(&self, file_path: impl Into<String>) -> SttTranslateRequestBuilder {
        SttTranslateRequestBuilder {
            config: self.config.clone(),
            client: self.client.clone(),
            file_path: file_path.into(),
            model: None,
            prompt: None,
            input_audio_codec: None,
        }
    }

    #[cfg(feature = "streaming")]
    pub fn stream(&self) -> crate::streaming::SttTranslateStreamBuilder {
        crate::streaming::SttTranslateStreamBuilder::new(self.config.clone())
    }
}