axonml-llm 0.6.2

Large Language Model architectures for the Axonml ML framework
Documentation
//! Nine LLM architectures for the AxonML framework.
//!
//! Complete pure-Rust implementations: GPT-2 (decoder-only), LLaMA (split-
//! halves RoPE + GQA + SwiGLU), Mistral (sliding-window attention), Phi
//! (partial RoPE + GELU), BERT (bidirectional encoder + classification/MLM),
//! SSM/Mamba (selective S6 scan + depthwise conv + SSMForCausalLM), Hydra
//! (hybrid SSM + windowed attention), Chimera (sparse MoE + differential
//! attention), Trident (1.58-bit ternary TernaryLinear, RoPE + GQA +
//! ReLU²-gated FFN + SubLN, graph-preserving RepeatKVBackward, configs for
//! 1B/3B/smoke). Shared building blocks: attention, RMSNorm, RotaryEmbedding,
//! embedding, text generation (top-k/top-p/temperature), HuggingFace weight
//! loader, and pretrained model hub.
//!
//! # File
//! `crates/axonml-llm/src/lib.rs`
//!
//! # Author
//! Andrew Jewell Sr. — AutomataNexus LLC
//! ORCID: 0009-0005-2158-7060
//!
//! # Updated
//! April 14, 2026 11:15 PM EST
//!
//! # Disclaimer
//! Use at own risk. This software is provided "as is", without warranty of any
//! kind, express or implied. The author and AutomataNexus shall not be held
//! liable for any damages arising from the use of this software.

#![warn(missing_docs)]
#![warn(clippy::all)]

pub mod attention;
pub mod bert;
pub mod chimera;
pub mod config;
pub mod embedding;
pub mod error;
pub mod generation;
pub mod gpt2;
pub mod hf_loader;
pub mod hub;
pub mod hydra;
pub mod llama;
pub mod mistral;
pub mod phi;
pub mod ssm;
pub mod state_dict;
pub mod tokenizer;
pub mod transformer;
pub mod trident;

pub use attention::{
    CausalSelfAttention, FlashAttention, FlashAttentionConfig, KVCache, LayerKVCache,
    MultiHeadSelfAttention, scaled_dot_product_attention,
};
pub use bert::{Bert, BertForMaskedLM, BertForSequenceClassification};
pub use chimera::{ChimeraConfig, ChimeraModel};
pub use config::{BertConfig, GPT2Config, TransformerConfig};
pub use embedding::{BertEmbedding, GPT2Embedding, PositionalEmbedding, TokenEmbedding};
pub use error::{LLMError, LLMResult};
pub use generation::{GenerationConfig, TextGenerator};
pub use gpt2::{GPT2, GPT2LMHead};
pub use hf_loader::{HFLoader, load_llama_from_hf, load_mistral_from_hf};
pub use hub::{PretrainedLLM, download_weights as download_llm_weights, llm_registry};
pub use hydra::{HydraConfig, HydraModel};
pub use llama::{LLaMA, LLaMAConfig, LLaMAForCausalLM};
pub use mistral::{Mistral, MistralConfig, MistralForCausalLM};
pub use phi::{Phi, PhiConfig, PhiForCausalLM};
pub use ssm::{SSMBlock, SSMConfig, SSMForCausalLM};
pub use state_dict::{LoadResult, LoadStateDict};
pub use tokenizer::{HFTokenizer, SpecialTokens};
pub use transformer::{TransformerBlock, TransformerDecoder, TransformerEncoder};
pub use trident::{TridentConfig, TridentModel};

// =============================================================================
// Tests
// =============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_gpt2_config() {
        let config = GPT2Config::small();
        assert_eq!(config.n_layer, 12);
        assert_eq!(config.n_head, 12);
        assert_eq!(config.n_embd, 768);
    }

    #[test]
    fn test_bert_config() {
        let config = BertConfig::base();
        assert_eq!(config.num_hidden_layers, 12);
        assert_eq!(config.num_attention_heads, 12);
        assert_eq!(config.hidden_size, 768);
    }
}