use catgrad_llm::run::*;
use catgrad_llm::serve::*;
use std::io::Write;
fn main() -> Result<()> {
let loader = ModelLoader::new("Qwen/Qwen3-0.6B", true).unwrap();
let mut runner = loader.load_runner()?;
let tokenizer = loader.load_tokenizer()?;
let system_message = Message {
role: "system".to_string(),
content: "You are a helpful chat assistant".to_string(),
};
let prompt_message = Message {
role: "user".to_string(),
content: "What is 2+2?".to_string(),
};
let messages = vec![system_message, prompt_message];
let context = tokenizer.encode_messages(messages)?;
for token in runner.complete(context) {
print!("{}", tokenizer.decode(vec![token])?);
let _ = std::io::stdout().flush();
}
Ok(())
}