bitmamba 0.1.0

BitMamba: 1.58-bit Mamba language model with infinite context window - includes OpenAI-compatible API server
Documentation
//! # BitMamba
//!
//! A 1.58-bit Mamba language model with infinite context window.
//!
//! ## Features
//!
//! - **Infinite Context Window** - Mamba's SSM maintains fixed-size state
//! - **1.58-bit Weights** - BitNet-style quantization
//! - **CPU Inference** - No GPU required
//! - **OpenAI-Compatible API** - Works with Cline, Continue, etc.
//!
//! ## Quick Start
//!
//! ```rust,no_run
//! use bitmamba::load;
//!
//! fn main() -> anyhow::Result<()> {
//!     let (model, tokenizer) = load()?;
//!     
//!     let prompt = "def fibonacci(n):";
//!     let tokens = tokenizer.encode(prompt, true).unwrap();
//!     let output = model.generate(tokens.get_ids(), 50, 0.7)?;
//!     
//!     println!("{}", tokenizer.decode(&output, true).unwrap());
//!     Ok(())
//! }
//! ```

pub mod model;

pub use model::{BitMambaStudent, BitLinear, RMSNorm, BitMambaBlock};

use anyhow::{Error, Result};
use candle_core::{DType, Device};
use candle_nn::VarBuilder;
use hf_hub::{api::sync::Api, Repo, RepoType};
use tokenizers::Tokenizer;

/// Default model repository on Hugging Face
pub const DEFAULT_MODEL_REPO: &str = "rileyseaburg/bitmamba-student";

/// Load the BitMamba model and tokenizer from Hugging Face
///
/// # Example
///
/// ```rust,no_run
/// let (model, tokenizer) = bitmamba::load()?;
/// ```
pub fn load() -> Result<(BitMambaStudent, Tokenizer)> {
    load_from_repo(DEFAULT_MODEL_REPO)
}

/// Load the BitMamba model and tokenizer from a specific Hugging Face repository
///
/// # Example
///
/// ```rust,no_run
/// let (model, tokenizer) = bitmamba::load_from_repo("rileyseaburg/bitmamba-student")?;
/// ```
pub fn load_from_repo(repo_id: &str) -> Result<(BitMambaStudent, Tokenizer)> {
    let device = Device::Cpu;
    
    let api = Api::new()?;
    let repo = api.repo(Repo::new(repo_id.to_string(), RepoType::Model));
    
    let model_path = repo.get("model.safetensors")?;
    let tokenizer_path = repo.get("tokenizer.json")?;

    let tokenizer = Tokenizer::from_file(tokenizer_path).map_err(Error::msg)?;
    let vb = unsafe { VarBuilder::from_mmaped_safetensors(&[model_path], DType::F32, &device)? };
    let model = BitMambaStudent::load(vb, device)?;
    
    Ok((model, tokenizer))
}

/// Load the BitMamba model from a local safetensors file
///
/// # Example
///
/// ```rust,no_run
/// let model = bitmamba::load_model_from_file("model.safetensors")?;
/// ```
pub fn load_model_from_file(path: &str) -> Result<BitMambaStudent> {
    let device = Device::Cpu;
    let path_buf = std::path::PathBuf::from(path);
    let vb = unsafe { VarBuilder::from_mmaped_safetensors(&[path_buf], DType::F32, &device)? };
    BitMambaStudent::load(vb, device)
}

/// Load tokenizer from a local file
///
/// # Example
///
/// ```rust,no_run
/// let tokenizer = bitmamba::load_tokenizer_from_file("tokenizer.json")?;
/// ```
pub fn load_tokenizer_from_file(path: &str) -> Result<Tokenizer> {
    Tokenizer::from_file(path).map_err(Error::msg)
}