rlx_models/
lib.rs

1// RLX — versatile ML compiler + runtime.
2// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, version 3.
7//
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License
14// along with this program. If not, see <https://www.gnu.org/licenses/>.
15
16//! RLX model loading — parse configs, load weights, build IR graphs.
17//!
18//! This crate is a thin facade over per-model workspace members (`rlx-qwen3`,
19//! `rlx-sam`, …). Depend on a specific model crate directly when you only need
20//! one family.
21
22pub use rlx_core::{
23    BertConfig, EmbedGgufKind, FlowBuildExt, GgufDirGuide, GgufModelFamily, GgufTensorNameResolver,
24    LlamaFamilyGgufResolver, LoadOpts, LoadWeightsOptions, LoadedWeights, NomicBertConfig,
25    NomicVisionConfig, PassThroughGgufResolver, Qwen35NativeGgufResolver, RegisteredFormat,
26    ResolveOpts, ResolveWeightsOptions, STANDARD_DEVICE_NAMES, WeightDrainPolicy,
27    WeightFormatRegistration, WeightLoader, WeightMap, WeightMapSource, arch_registry,
28    assert_gguf_family, config, dataprocessing, flow_bridge, flow_util, format_for_extension,
29    gguf_architecture_str, gguf_dir_guide, gguf_f32_bytes_estimate, gguf_family_for_arch,
30    gguf_resolve, gguf_runner_hint, gguf_support, into_compile_parts, is_standard_device,
31    list_registered_formats, lm, load_from_path, load_weight_map_resolved, load_weights_resolved,
32    open as open_weights, open_map, open_map_with, open_with, register_gguf_tensor_resolver,
33    register_weight_format, resolve_weights_file, resolve_weights_file_with_options,
34    validate_sam_device, validate_standard_device, vision_ops_ir, weight_loader, weight_map,
35    weight_registry, weights,
36};
37pub use rlx_flow::{BuiltModel, CompileProfile};
38
39pub mod bert {
40    pub use rlx_bert::bert::*;
41}
42pub mod bert_flow {
43    pub use rlx_bert::flow::*;
44}
45pub mod nomic {
46    pub use rlx_nomic::nomic::*;
47}
48pub mod nomic_flow {
49    pub use rlx_nomic::flow::*;
50}
51pub mod vision {
52    pub use rlx_vision::vision::*;
53}
54pub mod vision_flow {
55    pub use rlx_vision::flow::*;
56}
57pub mod dinov2 {
58    pub use rlx_dinov2::*;
59}
60pub mod embed {
61    pub use rlx_embed::*;
62}
63pub mod flux2 {
64    pub use rlx_flux2::*;
65}
66pub mod diamond {
67    pub use rlx_diamond::*;
68}
69pub mod qwen3 {
70    pub use rlx_qwen3::*;
71}
72pub mod qwen35 {
73    pub use rlx_qwen35::*;
74}
75pub mod llama32 {
76    pub use rlx_llama32::*;
77}
78pub mod gemma {
79    pub use rlx_gemma::*;
80}
81pub mod llada2 {
82    pub use rlx_llada2::llada2::*;
83}
84pub mod tide {
85    pub use rlx_llada2::tide::*;
86}
87pub mod sam {
88    pub use rlx_sam::*;
89}
90pub mod sam2 {
91    pub use rlx_sam2::*;
92}
93pub mod sam3 {
94    pub use rlx_sam3::*;
95}
96pub mod vjepa2 {
97    pub use rlx_vjepa2::*;
98}
99pub mod wav2vec2_bert {
100    pub use rlx_wav2vec2_bert::*;
101}
102pub mod whisper {
103    pub use rlx_whisper::*;
104}
105pub mod ocr {
106    pub use rlx_ocr::*;
107}
108pub mod neutts {
109    pub use rlx_neutts::*;
110}
111pub use rlx_neutts::{
112    BackboneModel, DEFAULT_N_CTX, GenerationConfig, NeuCodecDecoder, NeuCodecEncoder, NeuTTS,
113    STOP_TOKEN, build_prompt, extract_ids,
114};
115
116#[deprecated(note = "use `rlx_models::ocr`")]
117pub mod ocrs {
118    pub use rlx_ocr::*;
119}
120
121// ── Stub families (PLAN.md M4 — no runner yet). Each exposes a
122// `*Runner::builder().build()` that returns an error pointing at the
123// milestone, so callers get a typed surface to wire against today.
124pub mod mistral {
125    pub use rlx_mistral::*;
126}
127pub mod bonsai {
128    pub use rlx_bonsai::*;
129}
130pub mod phi {
131    pub use rlx_phi::*;
132}
133pub mod omnicoder {
134    pub use rlx_omnicoder::*;
135}
136pub mod granite {
137    pub use rlx_granite::*;
138}
139pub mod cohere {
140    pub use rlx_cohere::*;
141}
142pub mod mask_hyper_matmul_ir {
143    pub use rlx_sam_ir::mask_hyper_matmul_ir::*;
144}
145pub mod mask_prompt_ir {
146    pub use rlx_sam_ir::mask_prompt_ir::*;
147}
148pub mod mlp_relu_ir {
149    pub use rlx_sam_ir::mlp_relu_ir::*;
150}
151pub mod twoway_transformer_ir {
152    pub use rlx_sam_ir::twoway_transformer_ir::*;
153}
154
155pub mod run;
156mod sam_runner;
157
158pub use rlx_core::flow_bridge::{
159    apply_compile_profile, compile_graph_encoder, compile_graph_legacy,
160    compile_graph_llama32_decode, compile_graph_llama32_prefill, compile_graph_qwen3_decode,
161    compile_graph_qwen3_prefill, compile_graph_qwen35_decode, compile_graph_qwen35_prefill,
162    compile_graph_sam, compile_graph_with_profile, load_compile_profile, profile_near_weights,
163};
164pub use rlx_core::flow_util::{
165    build_graph, built_from_graph, built_from_hir, built_from_hir_with_profile, compile_built,
166    compile_built_cpu, compile_graph_encoder_with_params, compile_graph_profile,
167    compile_graph_qwen3_prefill_with_params, compile_graph_qwen35_decode_with_params,
168    compile_graph_qwen35_prefill_with_params, compile_graph_sam_with_params, graph_from_built,
169    graph_from_hir,
170};
171
172pub use bert::{build_bert_graph, build_bert_graph_sized};
173pub use bert_flow::{BertFlow, build_bert_built};
174pub use dinov2::{
175    DinoV2Built, DinoV2Config, DinoV2Flow, DinoV2PreprocessWeights, build_dinov2_built,
176    build_dinov2_graph_sized,
177};
178pub use embed::{
179    Arch, BertTokenizer, EmbeddingModel, ImageEmbeddingModel, ModelArch, ModelInfo, Pooling,
180    RlxBertModel, RlxEmbed, RlxNomicModel, RlxVisionModel, TokenizedBatch, assemble_vision_hidden,
181    compile_model, detect_arch, embed_with_rlx, models_map,
182};
183pub use flux2::{
184    DEFAULT_TEXT_ENCODER_LAYERS, Flux2CfgCombineFlow, Flux2CfgCombineGraph, Flux2Checkpoint,
185    Flux2Config, Flux2Flow, Flux2ForwardBuilt, Flux2ForwardGraph, Flux2ForwardInput,
186    Flux2GraphParams, Flux2PromptOutput, Flux2Session, Flux2SessionCache, Flux2SessionKey,
187    Flux2TextEncoderBuilt, Flux2TextEncoderFlow, Flux2VaeConfig, Flux2VaeDecoderFlow,
188    Flux2VaeEncoderFlow, Flux2VaeGraph, Flux2VaeWeights, Flux2Weights, build_flux2_cfg_combine_hir,
189    build_flux2_forward_graph, build_flux2_forward_hir, build_flux2_minimal_graph,
190    build_flux2_minimal_hir, build_flux2_text_encoder_hir, cfg_combine, compile_flux2_cfg_combine,
191    compile_flux2_forward, compile_flux2_forward_via_flow, compile_flux2_minimal,
192    compile_flux2_text_encoder_hir, download_flux2_repo, encode_flux2_prompt,
193    encode_prompt_embeds_default_layers, encode_prompt_padded, extract_flux2_vae_weights,
194    extract_flux2_weights, extract_text_encoder_weights, flux2_decode_packed_latents,
195    flux2_prefers_compiled_hir, flux2_prefers_compiled_te, flux2_rgb_to_u8,
196    flux2_transformer_forward, host_temb, load_and_apply_flux2_lora, load_flux2_vae_weights,
197    load_flux2_weights, load_rgb_planar, load_text_encoder_weights, parse_lora_scale,
198    prepare_latent_ids, prepare_text_ids, prepare_weight_map, resolve_text_encoder_dir,
199    resolve_tokenizer_path, resolve_transformer_config, resolve_vae_dir, tiny_text_encoder_config,
200};
201pub use gemma::{
202    GemmaArch, GemmaConfig, GemmaFlow, GemmaGenerator, build_gemma_decode_graph_sized,
203    build_gemma_graph_sized, build_gemma_graph_sized_last_logits, build_gemma_graph_sized_packed,
204    encode_prompt as gemma_encode_prompt, encode_prompt_auto as gemma_encode_prompt_auto,
205    gemma_cfg_from_gguf, resolve_tokenizer_path as gemma_resolve_tokenizer_path,
206};
207pub use llada2::{
208    LLaDA2MoeConfig, LLaDA2Runner, LLaDA2RunnerBuilder, LLaDA2Weights, build_llada2_forward_graph,
209    default_memory_budget_bytes, validate_device as validate_llada2_device,
210};
211pub use llama32::{
212    Llama32Config, Llama32Flow, Llama32Generator, build_llama32_decode_graph_sized,
213    build_llama32_graph_sized, build_llama32_graph_sized_last_logits,
214    build_llama32_graph_sized_packed, encode_prompt as llama32_encode_prompt,
215    encode_prompt_auto as llama32_encode_prompt_auto, llama32_cfg_from_gguf,
216    resolve_tokenizer_path as llama32_resolve_tokenizer_path,
217};
218pub use nomic::{build_nomic_diagnostic_graph, build_nomic_graph_sized};
219pub use nomic_flow::{NomicFlow, build_nomic_built};
220pub use ocr::{
221    BLACK_VALUE, DEFAULT_ALPHABET, DecodeMethod, DetectionParams, DimOrder, HF_DETECTION_RTEN,
222    HF_DETECTION_ST, HF_RECOGNITION_RTEN, HF_RECOGNITION_ST, ImageSource, OcrConfig, OcrEngine,
223    OcrEngineParams, OcrInput, OcrOutput, OcrRunner, OcrRunnerBuilder, RotatedRect, TextChar,
224    TextLine, TextWord, resolve_model_dir,
225};
226pub use qwen3::{
227    Qwen3Config, Qwen3Flow, Qwen3Generator, Qwen3PrefillOpts, Qwen3Speculator, SampleOpts,
228    build_qwen3_graph_sized, build_qwen3_prefill_built, sample_token,
229};
230pub use qwen35::{
231    ChatMessage, ChatRole, MatWeight, Qwen35Config, Qwen35FullAttnLayer, Qwen35LayerFfn,
232    Qwen35LinearLayer, Qwen35MoeFfn, Qwen35MtpLayer, Qwen35PrefillOutput, Qwen35Runner,
233    Qwen35RunnerBuilder, Qwen35TrunkLayer, Qwen35Weights, build_qwen35_decode_graph,
234    build_qwen35_decode_hir_dynamic_ext, build_qwen35_graph_sized, build_qwen35_graph_sized_ext,
235    build_qwen35_graph_sized_stub, build_qwen35_prefill_cache_graph,
236    build_qwen35_prefill_cache_graph_ext, build_qwen35_prefill_cache_hir_dynamic_ext,
237    decode_step_feeds, encode_chat_auto, format_chatml, messages_from_prompt, mrope_prefill_feeds,
238    mrope_row_for_sections, mrope_slice_at_pos, mtp_draft_vocab_size, pack_input_ids,
239    parse_messages_json, recurrent_output_count, seed_cache_from_outputs,
240    supports_multimodal_mrope, synth as qwen35_synth, text_section_pos, validate_device,
241    zero_recurrent_inputs,
242};
243pub use rlx_flux2::{Flux2Output, Flux2Runner, Flux2RunnerBuilder};
244pub use run::{
245    ConfigSource, DinoV2Output, DinoV2Runner, DinoV2RunnerBuilder, DinoV2Variant, Llama32Runner,
246    Llama32RunnerBuilder, LmRunner, ModelRunner, Precision, Qwen3Runner, Qwen3RunnerBuilder,
247    SamArch, SamPredictionAny, SamRunner, SamRunnerBuilder, Vjepa2Output, Vjepa2PoolOutput,
248    Vjepa2PredictOutput, Vjepa2Runner, Vjepa2RunnerBuilder, Wav2Vec2BertRunner,
249    Wav2Vec2BertRunnerBuilder, WeightFormat, debug_resolve_name, dispatch, dispatch_help,
250    list_mtp_keys, open_gguf_loader, open_loader, open_loader_resolved, open_loader_with_format,
251    register_runner, registered_runners, run_registered,
252};
253pub use sam::{
254    NeckWeights as SamNeckWeights, SamConfig, SamEncoderBuilt, SamEncoderConfig, SamEncoderFlow,
255    SamPreprocessWeights, apply_neck_host as sam_apply_neck_host,
256    assemble_patch_tokens as sam_assemble_patch_tokens, build_sam_encoder_built,
257    build_sam_encoder_graph, preprocess_image as sam_preprocess_image,
258};
259pub use sam2::{
260    FpnLevel as Sam2FpnLevel, FpnNeckWeights as Sam2FpnNeckWeights, Sam2, Sam2Config,
261    Sam2DecoderConfig, Sam2FpnConfig, Sam2HieraConfig, Sam2ImageEncoderBuilt, Sam2ImageEncoderFlow,
262    Sam2ImagePrediction, Sam2MaskDecoderOutput, Sam2MaskDecoderWeights, Sam2MemoryAttentionWeights,
263    Sam2MemoryConfig, Sam2MemoryEncoderConfig, Sam2MemoryEncoderOutput, Sam2MemoryEncoderWeights,
264    Sam2PreprocessWeights, Sam2PromptEncoderOutput, Sam2PromptEncoderWeights,
265    Sam2TwoWayTransformerWeights, Sam2VideoState, apply_fpn_neck as sam2_apply_fpn_neck,
266    apply_fpn_neck_host as sam2_apply_fpn_neck_host,
267    assemble_patch_tokens as sam2_assemble_patch_tokens, build_sam2_image_encoder_built,
268    build_sam2_image_encoder_graph, mask_decoder_forward as sam2_mask_decoder_forward,
269    memory_attention_forward as sam2_memory_attention_forward,
270    memory_encoder_forward as sam2_memory_encoder_forward,
271    preprocess_image as sam2_preprocess_image,
272    prompt_encoder_forward as sam2_prompt_encoder_forward,
273    two_way_transformer_forward as sam2_two_way_transformer_forward,
274};
275pub use sam3::{
276    Sam3, Sam3CompiledDecoder, Sam3Config, Sam3DetectorConfig, Sam3DetectorDecoderBuilt,
277    Sam3DetectorDecoderFlow, Sam3DetectorEncoderFlow, Sam3EncodedImage, Sam3ImagePrediction,
278    Sam3PreprocessWeights, Sam3TextConfig, Sam3TrackerConfig, Sam3VideoFramePrediction,
279    Sam3VideoState, Sam3VitConfig, assemble_patch_tokens as sam3_assemble_patch_tokens,
280    build_sam3_detector_decoder_built, build_sam3_detector_encoder_built,
281    build_sam3_detector_encoder_graph, forward_decoder_ir_on,
282    preprocess_image as sam3_preprocess_image,
283};
284pub use tide::{
285    BlockDenoiseConfig, BlockDenoiseLoop, GenerateConfig, PredictiveOffloadInfo,
286    PredictiveOffloadParams, TideOffloadStats, TideRunner, aggregate_offload_stats,
287    refresh_experts,
288};
289pub use vision::{VisionPreprocessWeights, build_vision_graph_sized};
290pub use vision_flow::{NomicVisionBuilt, NomicVisionFlow, build_nomic_vision_built};
291pub use vjepa2::{
292    Vjepa2Config, Vjepa2EncoderBuilt, Vjepa2EncoderFlow, Vjepa2EncoderOutput, Vjepa2EncoderWeights,
293    Vjepa2Masks, Vjepa2ModelWeights, Vjepa2PatchEmbedWeights, Vjepa2PoolerFlow,
294    Vjepa2PoolerWeights, Vjepa2PredictorFlow, Vjepa2PredictorWeights,
295    build_vjepa2_encoder_graph_sized, conv3d_patch_embed, encode_video_native,
296    extract_encoder_weights, extract_model_weights, extract_patch_embed_weights,
297    extract_pooler_weights, extract_predictor_weights, normalize_video_hwc, pool_native,
298    predict_native,
299};
300pub use wav2vec2_bert::{
301    LogMelExtractor, LogMelFeatures, Wav2Vec2BertConfig, Wav2Vec2BertFlow,
302    Wav2Vec2BertPreprocessConfig, build_wav2vec2_bert_built, build_wav2vec2_bert_graph_sized,
303    load_wav_mono_f32,
304};
305pub use whisper::{
306    MelSpectrogram, WhisperConfig, WhisperDecoderFlow, WhisperEncoderFlow, WhisperKvCache,
307    WhisperRunner, WhisperRunnerBuilder, WhisperWeightPrefix, build_whisper_decode_step_built,
308    build_whisper_decoder_built, build_whisper_decoder_graph_sized,
309    build_whisper_decoder_prefill_built, build_whisper_encoder_built,
310    build_whisper_encoder_graph_sized, default_mel_frames, pcm_to_mel,
311};
rlx_models/lib.rs

rlx_models/
lib.rs