use anyhow::{Error, Result};
fn main() -> Result<()> {
println!("=== BitMamba Rust Inference ===\n");
println!("Loading model from HuggingFace...");
let (model, tokenizer) = bitmamba::load()?;
println!("Ready!\n");
let prompt = "def fib(n):";
let temperature = 0.7;
let max_tokens = 50;
println!("Prompt: {}", prompt);
let tokens = tokenizer.encode(prompt, true).map_err(Error::msg)?;
let input_ids = tokens.get_ids();
print!("{}", prompt);
use std::io::Write;
std::io::stdout().flush()?;
let output_ids = model.generate(input_ids, max_tokens, temperature)?;
let new_tokens = &output_ids[input_ids.len()..];
for &token_id in new_tokens {
let token = tokenizer.decode(&[token_id], true).map_err(Error::msg)?;
print!("{}", token);
std::io::stdout().flush()?;
}
println!("\n");
Ok(())
}