use anyhow::Result;
use mistralrs::{AudioInput, MultimodalMessages, MultimodalModelBuilder, TextMessageRole};
#[tokio::main]
async fn main() -> Result<()> {
let model = MultimodalModelBuilder::new("../hf_models/gemma3n_e4b")
.with_logging()
.build()
.await?;
let audio_bytes = std::fs::read("sample_speech.wav")?;
let audio = AudioInput::from_bytes(&audio_bytes)?;
let messages = MultimodalMessages::new().add_audio_message(
TextMessageRole::User,
"What is being said?",
vec![audio],
);
let response = model.send_chat_request(messages).await?;
println!("{}", response.choices[0].message.content.as_ref().unwrap());
Ok(())
}