Skip to main content

hermes_llm/
lib.rs

1//! # Hermes LLM
2//!
3//! A Rust library for training and running Large Language Models from scratch.
4//!
5//! ## Features
6//!
7//! - **Model Architecture Language (MAL)**: Define any transformer architecture using a composable DSL
8//! - **Training**: Distributed training with NCCL, gradient accumulation, checkpointing
9//! - **Generation**: Text generation with temperature, top-k sampling
10//! - **Tokenization**: BPE tokenizer training and inference
11//! - **DPO**: Direct Preference Optimization for RLHF
12//!
13//! ## Quick Start
14//!
15//! ```ignore
16//! use hermes_llm::{Transformer, Trainer, get_builtin_model};
17//!
18//! // Load a predefined model architecture
19//! let model_def = get_builtin_model("tiny").unwrap();
20//!
21//! // Or parse from MAL file
22//! let model_def = hermes_llm::parse_mal_file("model.mal").unwrap();
23//! ```
24//!
25//! ## Model Architecture Language (MAL)
26//!
27//! MAL allows defining transformer architectures in a readable, composable format:
28//!
29//! ```text
30//! attention my_attn {
31//!     num_heads: 32
32//!     num_kv_heads: 8
33//! }
34//!
35//! ffn my_ffn {
36//!     hidden_dim: 4096
37//!     activation: swiglu
38//! }
39//!
40//! block my_block {
41//!     attention: my_attn
42//!     ffn: my_ffn
43//!     norm: rmsnorm { eps: 1e-5 }
44//!     norm_position: pre
45//! }
46//!
47//! model my_model {
48//!     vocab_size: 32000
49//!     hidden_size: 1024
50//!     num_layers: 32
51//!     block: my_block
52//! }
53//! ```
54
55pub mod config;
56pub mod data;
57pub mod distributed;
58pub mod dpo;
59pub mod generate;
60pub mod io;
61pub mod mal;
62pub mod model;
63pub mod tokenizer;
64pub mod training;
65
66// Core types
67pub use config::TrainingConfig;
68pub use model::Transformer;
69
70// Training
71pub use training::{Trainer, TrainingState, create_progress_bar};
72
73// Generation
74pub use generate::{TextGenerator, get_lr_with_warmup};
75
76// Distributed
77pub use distributed::{DistributedConfig, NcclCommunicator};
78
79// Model Architecture Language (MAL)
80pub use mal::{
81    Activation, AttentionDef, BlockDef, FfnDef, MalFile, ModelDef, NormPosition, NormType,
82    PositionEncoding, get_builtin_model, get_wellknown_mal, list_wellknown_models, parse_mal,
83    parse_mal_file, parse_mal_full,
84};
85
86// Data loading
87pub use data::{DataLoader, Dataset};
88
89// Tokenization
90pub use tokenizer::{BPETrainer, Tokenizer};