Skip to main content

ferrum_models/models/
mod.rs

1//! Model-as-Code implementations.
2//!
3//! Each module defines one model family as explicit Rust code: structs for
4//! weights + `forward` methods using the `Backend` trait and `Linear` trait
5//! directly. This replaces the earlier "generic ModelRunner +
6//! TransformerConfig" approach, which could not express MoE / MLA /
7//! multimodal / quantization cleanly.
8//!
9//! Current coverage:
10//!   - `llama_family` — Llama / Llama-2 / Llama-3 / Qwen2 / Qwen2.5 / Qwen3
11//!                      (standard GQA + SwiGLU + RoPE, optional QK-norm).
12//!
13//! Planned (Phase D):
14//!   - `mistral`     — sliding-window attention variant.
15//!   - `deepseek_v3` — MLA compressed KV + MoE expert routing.
16//!   - `qwen_vl`     — ViT backbone + LLM (multimodal).
17
18pub mod llama_family;
19pub mod llama_family_pipeline;
20pub mod qwen3_moe;
21pub mod qwen3_moe_profile;
22pub mod qwen3_moe_runtime;
23
24pub use llama_family::{LlamaFamilyConfig, LlamaFamilyModel};
25pub use llama_family_pipeline::{
26    LlamaFamilyPipelineModel, LlamaPipelineMode, LlamaPipelinePlacement, LlamaPipelineStageBridge,
27    LlamaPipelineStagePlacement, LlamaPipelineTransport,
28};
29pub use qwen3_moe::Qwen3MoeModel;
30pub mod llama_family_forward_batched;
31pub mod qwen3_moe_forward_unified;
32pub mod qwen3_moe_forward_unified_layer;