ferrum_models/models/
mod.rs

1//! Model-as-Code implementations.
2//!
3//! Each module defines one model family as explicit Rust code: structs for
4//! weights + `forward` methods using the `Backend` trait and `Linear` trait
5//! directly. This replaces the earlier "generic ModelRunner +
6//! TransformerConfig" approach, which could not express MoE / MLA /
7//! multimodal / quantization cleanly.
8//!
9//! Current coverage:
10//!   - `llama_family` — Llama / Llama-2 / Llama-3 / Qwen2 / Qwen2.5 / Qwen3
11//!                      (standard GQA + SwiGLU + RoPE, optional QK-norm).
12//!
13//! Planned (Phase D):
14//!   - `mistral`     — sliding-window attention variant.
15//!   - `deepseek_v3` — MLA compressed KV + MoE expert routing.
16//!   - `qwen_vl`     — ViT backbone + LLM (multimodal).
17
18pub mod llama_family;
19pub mod qwen3_moe;
20pub mod qwen3_moe_profile;
21pub mod qwen3_moe_runtime;
22
23pub use llama_family::{LlamaFamilyConfig, LlamaFamilyModel};
24pub use qwen3_moe::Qwen3MoeModel;
25pub mod llama_family_forward_batched;
26pub mod qwen3_moe_forward_unified;
27pub mod qwen3_moe_forward_unified_layer;
ferrum_models/models/mod.rs

ferrum_models/models/
mod.rs