bitmamba 0.1.0

BitMamba: 1.58-bit Mamba language model with infinite context window - includes OpenAI-compatible API server
Documentation
//! BitMamba CLI - Command line inference

use anyhow::{Error, Result};

fn main() -> Result<()> {
    println!("=== BitMamba Rust Inference ===\n");

    println!("Loading model from HuggingFace...");
    let (model, tokenizer) = bitmamba::load()?;
    println!("Ready!\n");

    let prompt = "def fib(n):";
    let temperature = 0.7;
    let max_tokens = 50;
    
    println!("Prompt: {}", prompt);
    
    let tokens = tokenizer.encode(prompt, true).map_err(Error::msg)?;
    let input_ids = tokens.get_ids();

    print!("{}", prompt);
    use std::io::Write;
    std::io::stdout().flush()?;
    
    let output_ids = model.generate(input_ids, max_tokens, temperature)?;
    let new_tokens = &output_ids[input_ids.len()..];
    
    for &token_id in new_tokens {
        let token = tokenizer.decode(&[token_id], true).map_err(Error::msg)?;
        print!("{}", token);
        std::io::stdout().flush()?;
    }
    
    println!("\n");
    Ok(())
}