fn main() -> anyhow::Result<()> {
let mut args = std::env::args().skip(1);
let model_path = match args.next() {
Some(p) => p,
None => {
eprintln!("Usage: 03_streaming <model.gguf> [prompt]");
eprintln!("(No model path provided — exiting cleanly)");
return Ok(());
}
};
let prompt = args
.next()
.unwrap_or_else(|| "Tell me a story.".to_string());
let config = oxillama_runtime::EngineConfig {
model_path: model_path.clone(),
..Default::default()
};
let mut engine = oxillama_runtime::InferenceEngine::new(config);
engine.load_model()?;
println!("Streaming response for: {prompt}");
println!("---");
use std::io::Write as _;
engine.generate(&prompt, 256, |tok| {
print!("{tok}");
let _ = std::io::stdout().flush();
})?;
println!("\n---");
println!("Done.");
Ok(())
}