ferrum_models/models/mod.rs
1//! Model-as-Code implementations.
2//!
3//! Each module defines one model family as explicit Rust code: structs for
4//! weights + `forward` methods using the `Backend` trait and `Linear` trait
5//! directly. This replaces the earlier "generic ModelRunner +
6//! TransformerConfig" approach, which could not express MoE / MLA /
7//! multimodal / quantization cleanly.
8//!
9//! Current coverage:
10//! - `llama_family` — Llama / Llama-2 / Llama-3 / Qwen2 / Qwen2.5 / Qwen3
11//! (standard GQA + SwiGLU + RoPE, optional QK-norm).
12//!
13//! Planned (Phase D):
14//! - `mistral` — sliding-window attention variant.
15//! - `deepseek_v3` — MLA compressed KV + MoE expert routing.
16//! - `qwen_vl` — ViT backbone + LLM (multimodal).
17
18pub mod llama_family;
19pub mod llama_family_pipeline;
20pub mod qwen3_moe;
21pub mod qwen3_moe_profile;
22pub mod qwen3_moe_runtime;
23
24pub use llama_family::{LlamaFamilyConfig, LlamaFamilyModel};
25pub use llama_family_pipeline::{
26 LlamaFamilyPipelineModel, LlamaPipelineMode, LlamaPipelinePlacement, LlamaPipelineStageBridge,
27 LlamaPipelineStagePlacement, LlamaPipelineTransport,
28};
29pub use qwen3_moe::Qwen3MoeModel;
30pub mod llama_family_forward_batched;
31pub mod qwen3_moe_forward_unified;
32pub mod qwen3_moe_forward_unified_layer;