1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
// RLX — versatile ML compiler + runtime.
// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 3.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! LLaDA2 MoE diffusion language model (TIDE reference: `/Users/Shared/TIDE`).
//!
//! - [`LLaDA2MoeConfig`] — HF / TIDE `config.json`
//! - [`build_llada2_forward_graph`] — bidirectional attention + MoE FFN
//! - [`LLaDA2Runner`] — multi-backend forward + [`generate`] + TIDE offload
//! (standard backends: CPU, Metal, MLX, CUDA, ROCm, WGPU, Vulkan)
//!
//! ## PyTorch parity checklist
//!
//! | Component | Status |
//! |-----------|--------|
//! | RMSNorm, fused QKV, QK-norm, partial RoPE | Graph |
//! | Bidirectional attention + `head_dim^-0.5` scale | [`Op::Attention`] B,H,S,D |
//! | Group-limited sigmoid gate + expert bias routing | [`gate_op`] |
//! | SwiGLU dense + MoE + shared expert | Graph |
//! | Block-diffusion mask + `generate` loop | [`generate`] |
//! | Temperature / top-k / top-p sampling | [`sampling`] |
//! | TIDE expert offload | [`moe_store`] + runtime pools |
//!
//! Validate against PyTorch:
//! - Component parity: `tests/llada2_numerical_parity.rs`
//! - Full e2e (weights): `LLADA2_MODEL_DIR=… cargo test --test llada2_e2e_parity`
//! - CLI: `cargo run -p rlx-models --example llada2_run -- --model-dir … --device metal`
pub use ;
pub use build_llada2_forward_graph;
pub use ;
pub use ;
pub use LLaDA2MoeConfig;
pub use ;
pub use ;
pub use block_diffusion_attention_mask;
pub use ;
pub use ;
pub use sample_logits;
pub use LLaDA2Weights;