Skip to main content

gemma3n_audio/
main.rs

1use anyhow::Result;
2use mistralrs::{AudioInput, TextMessageRole, VisionMessages, VisionModelBuilder};
3
4#[tokio::main]
5async fn main() -> Result<()> {
6    let model = VisionModelBuilder::new("../hf_models/gemma3n_e4b")
7        .with_logging()
8        .build()
9        .await?;
10
11    let audio_bytes = std::fs::read("sample_speech.wav")?;
12    let audio = AudioInput::from_bytes(&audio_bytes)?;
13
14    let messages = VisionMessages::new().add_multimodal_message(
15        TextMessageRole::User,
16        "What is being said?",
17        vec![],
18        vec![audio],
19        &model,
20    )?;
21
22    let response = model.send_chat_request(messages).await?;
23
24    println!("{}", response.choices[0].message.content.as_ref().unwrap());
25    Ok(())
26}