rlx_models/
lib.rs

1// RLX — versatile ML compiler + runtime.
2// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, version 3.
7//
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License
14// along with this program. If not, see <https://www.gnu.org/licenses/>.
15
16//! RLX model loading — parse configs, load weights, build IR graphs.
17//!
18//! This crate is a thin facade over per-model workspace members (`rlx-qwen3`,
19//! `rlx-sam`, …). Depend on a specific model crate directly when you only need
20//! one family.
21
22pub use rlx_core::{
23    BertConfig, EmbedGgufKind, FlowBuildExt, GgufDirGuide, GgufModelFamily, GgufTensorNameResolver,
24    LlamaFamilyGgufResolver, LoadOpts, LoadWeightsOptions, LoadedWeights, NomicBertConfig,
25    NomicVisionConfig, PassThroughGgufResolver, Qwen35NativeGgufResolver, RegisteredFormat,
26    ResolveOpts, ResolveWeightsOptions, STANDARD_DEVICE_NAMES, WeightDrainPolicy,
27    WeightFormatRegistration, WeightLoader, WeightMap, WeightMapSource, arch_registry,
28    assert_gguf_family, config, dataprocessing, flow_bridge, flow_util, format_for_extension,
29    gguf_architecture_str, gguf_dir_guide, gguf_f32_bytes_estimate, gguf_family_for_arch,
30    gguf_resolve, gguf_runner_hint, gguf_support, into_compile_parts, is_standard_device,
31    list_registered_formats, lm, load_from_path, load_weight_map_resolved, load_weights_resolved,
32    open as open_weights, open_map, open_map_with, open_with, register_gguf_tensor_resolver,
33    register_weight_format, resolve_weights_file, resolve_weights_file_with_options,
34    validate_sam_device, validate_standard_device, vision_ops_ir, weight_loader, weight_map,
35    weight_registry, weights,
36};
37pub use rlx_flow::{BuiltModel, CompileProfile};
38
39pub mod bert {
40    pub use rlx_bert::bert::*;
41}
42pub mod bert_flow {
43    pub use rlx_bert::flow::*;
44}
45pub mod clinicalbert {
46    pub use rlx_clinicalbert::*;
47}
48pub mod nomic {
49    pub use rlx_nomic::nomic::*;
50}
51pub mod nomic_flow {
52    pub use rlx_nomic::flow::*;
53}
54pub mod vision {
55    pub use rlx_vision::vision::*;
56}
57pub mod vision_flow {
58    pub use rlx_vision::flow::*;
59}
60pub mod dinov2 {
61    pub use rlx_dinov2::*;
62}
63pub mod embed {
64    pub use rlx_embed::*;
65}
66pub mod flux2 {
67    pub use rlx_flux2::*;
68}
69pub mod diamond {
70    pub use rlx_diamond::*;
71}
72pub mod qwen3 {
73    pub use rlx_qwen3::*;
74}
75pub mod qwen35 {
76    pub use rlx_qwen35::*;
77}
78pub mod llama32 {
79    pub use rlx_llama32::*;
80}
81pub mod gemma {
82    pub use rlx_gemma::*;
83}
84pub mod llada2 {
85    pub use rlx_llada2::llada2::*;
86}
87pub mod tide {
88    pub use rlx_llada2::tide::*;
89}
90pub mod sam {
91    pub use rlx_sam::*;
92}
93pub mod sam2 {
94    pub use rlx_sam2::*;
95}
96pub mod sam3 {
97    pub use rlx_sam3::*;
98}
99pub mod vjepa2 {
100    pub use rlx_vjepa2::*;
101}
102pub mod wav2vec2_bert {
103    pub use rlx_wav2vec2_bert::*;
104}
105pub mod whisper {
106    pub use rlx_whisper::*;
107}
108pub mod vad {
109    pub use rlx_vad::*;
110}
111pub mod voxtral {
112    pub use rlx_voxtral::*;
113}
114pub mod voxtral_tts {
115    pub use rlx_voxtral_tts::*;
116}
117
118pub mod qwen3_tts {
119    pub use rlx_qwen3_tts::*;
120}
121pub mod locateanything {
122    pub use rlx_locateanything::*;
123}
124pub mod ocr {
125    pub use rlx_ocr::*;
126}
127pub mod neutts {
128    pub use rlx_neutts::*;
129}
130pub mod kittentts {
131    pub use rlx_kittentts::*;
132}
133pub use rlx_neutts::{
134    BackboneModel, DEFAULT_N_CTX, GenerationConfig, NeuCodecDecoder, NeuCodecEncoder, NeuTTS,
135    STOP_TOKEN, build_prompt, extract_ids,
136};
137
138#[deprecated(note = "use `rlx_models::ocr`")]
139pub mod ocrs {
140    pub use rlx_ocr::*;
141}
142
143// ── Stub families (PLAN.md M4 — no runner yet). Each exposes a
144// `*Runner::builder().build()` that returns an error pointing at the
145// milestone, so callers get a typed surface to wire against today.
146pub mod mistral {
147    pub use rlx_mistral::*;
148}
149pub mod bonsai {
150    pub use rlx_bonsai::*;
151}
152pub mod minicpm5 {
153    pub use rlx_minicpm5::*;
154}
155pub mod phi {
156    pub use rlx_phi::*;
157}
158pub mod omnicoder {
159    pub use rlx_omnicoder::*;
160}
161pub mod granite {
162    pub use rlx_granite::*;
163}
164pub mod cohere {
165    pub use rlx_cohere::*;
166}
167pub mod mask_hyper_matmul_ir {
168    pub use rlx_sam_ir::mask_hyper_matmul_ir::*;
169}
170pub mod mask_prompt_ir {
171    pub use rlx_sam_ir::mask_prompt_ir::*;
172}
173pub mod mlp_relu_ir {
174    pub use rlx_sam_ir::mlp_relu_ir::*;
175}
176pub mod twoway_transformer_ir {
177    pub use rlx_sam_ir::twoway_transformer_ir::*;
178}
179
180pub mod run;
181mod sam_runner;
182
183pub use rlx_core::flow_bridge::{
184    apply_compile_profile, compile_graph_encoder, compile_graph_legacy,
185    compile_graph_llama32_decode, compile_graph_llama32_prefill, compile_graph_qwen3_decode,
186    compile_graph_qwen3_prefill, compile_graph_qwen35_decode, compile_graph_qwen35_prefill,
187    compile_graph_sam, compile_graph_with_profile, load_compile_profile, profile_near_weights,
188};
189pub use rlx_core::flow_util::{
190    build_graph, built_from_graph, built_from_hir, built_from_hir_with_profile, compile_built,
191    compile_built_cpu, compile_graph_encoder_with_params, compile_graph_profile,
192    compile_graph_qwen3_prefill_with_params, compile_graph_qwen35_decode_with_params,
193    compile_graph_qwen35_prefill_with_params, compile_graph_sam_with_params, graph_from_built,
194    graph_from_hir,
195};
196
197pub use bert::{build_bert_graph, build_bert_graph_sized};
198pub use bert_flow::{BertFlow, build_bert_built};
199pub use dinov2::{
200    DinoV2Built, DinoV2Config, DinoV2Flow, DinoV2PreprocessWeights, build_dinov2_built,
201    build_dinov2_graph_sized,
202};
203pub use embed::{
204    Arch, BertTokenizer, EmbeddingModel, ImageEmbeddingModel, ModelArch, ModelInfo, Pooling,
205    RlxBertModel, RlxEmbed, RlxNomicModel, RlxVisionModel, TokenizedBatch, assemble_vision_hidden,
206    compile_model, detect_arch, embed_with_rlx, models_map,
207};
208pub use flux2::{
209    DEFAULT_TEXT_ENCODER_LAYERS, Flux2CfgCombineFlow, Flux2CfgCombineGraph, Flux2Checkpoint,
210    Flux2Config, Flux2Flow, Flux2ForwardBuilt, Flux2ForwardGraph, Flux2ForwardInput,
211    Flux2GraphParams, Flux2PromptOutput, Flux2Session, Flux2SessionCache, Flux2SessionKey,
212    Flux2TextEncoderBuilt, Flux2TextEncoderFlow, Flux2VaeConfig, Flux2VaeDecoderFlow,
213    Flux2VaeEncoderFlow, Flux2VaeGraph, Flux2VaeWeights, Flux2Weights, build_flux2_cfg_combine_hir,
214    build_flux2_forward_graph, build_flux2_forward_hir, build_flux2_minimal_graph,
215    build_flux2_minimal_hir, build_flux2_text_encoder_hir, cfg_combine, compile_flux2_cfg_combine,
216    compile_flux2_forward, compile_flux2_forward_via_flow, compile_flux2_minimal,
217    compile_flux2_text_encoder_hir, download_flux2_repo, encode_flux2_prompt,
218    encode_prompt_embeds_default_layers, encode_prompt_padded, extract_flux2_vae_weights,
219    extract_flux2_weights, extract_text_encoder_weights, flux2_decode_packed_latents,
220    flux2_prefers_compiled_hir, flux2_prefers_compiled_te, flux2_rgb_to_u8,
221    flux2_transformer_forward, host_temb, load_and_apply_flux2_lora, load_flux2_vae_weights,
222    load_flux2_weights, load_rgb_planar, load_text_encoder_weights, parse_lora_scale,
223    prepare_latent_ids, prepare_text_ids, prepare_weight_map, resolve_text_encoder_dir,
224    resolve_tokenizer_path, resolve_transformer_config, resolve_vae_dir, tiny_text_encoder_config,
225};
226pub use gemma::{
227    GemmaArch, GemmaConfig, GemmaFlow, GemmaGenerator, build_gemma_decode_graph_sized,
228    build_gemma_graph_sized, build_gemma_graph_sized_last_logits, build_gemma_graph_sized_packed,
229    encode_prompt as gemma_encode_prompt, encode_prompt_auto as gemma_encode_prompt_auto,
230    gemma_cfg_from_gguf, resolve_tokenizer_path as gemma_resolve_tokenizer_path,
231};
232pub use llada2::{
233    LLaDA2MoeConfig, LLaDA2Runner, LLaDA2RunnerBuilder, LLaDA2Weights, build_llada2_forward_graph,
234    default_memory_budget_bytes, validate_device as validate_llada2_device,
235};
236pub use llama32::{
237    Llama32Config, Llama32Flow, Llama32Generator, build_llama32_decode_graph_sized,
238    build_llama32_graph_sized, build_llama32_graph_sized_last_logits,
239    build_llama32_graph_sized_packed, encode_prompt as llama32_encode_prompt,
240    encode_prompt_auto as llama32_encode_prompt_auto, llama32_cfg_from_gguf,
241    resolve_tokenizer_path as llama32_resolve_tokenizer_path,
242};
243pub use nomic::{build_nomic_diagnostic_graph, build_nomic_graph_sized};
244pub use nomic_flow::{NomicFlow, build_nomic_built};
245pub use ocr::{
246    BLACK_VALUE, DEFAULT_ALPHABET, DecodeMethod, DetectionParams, DimOrder, HF_DETECTION_RTEN,
247    HF_DETECTION_ST, HF_RECOGNITION_RTEN, HF_RECOGNITION_ST, ImageSource, OcrConfig, OcrEngine,
248    OcrEngineParams, OcrInput, OcrOutput, OcrRunner, OcrRunnerBuilder, RotatedRect, TextChar,
249    TextLine, TextWord, resolve_model_dir,
250};
251pub use qwen3::{
252    Qwen3Config, Qwen3Flow, Qwen3Generator, Qwen3PrefillOpts, Qwen3Speculator, SampleOpts,
253    build_qwen3_graph_sized, build_qwen3_prefill_built, sample_token,
254};
255pub use qwen3_tts::{
256    HF_MODEL_ID_06B_CUSTOM as QWEN3_TTS_HF_MODEL_ID, PRESET_SPEAKERS as QWEN3_TTS_SPEAKERS,
257    Qwen3TtsBenchReport, Qwen3TtsConfig, Qwen3TtsRunner, Qwen3TtsWeightStore, TalkerEngine,
258};
259pub use qwen35::{
260    ChatMessage, ChatRole, MatWeight, Qwen35Config, Qwen35FullAttnLayer, Qwen35LayerFfn,
261    Qwen35LinearLayer, Qwen35MoeFfn, Qwen35MtpLayer, Qwen35PrefillOutput, Qwen35Runner,
262    Qwen35RunnerBuilder, Qwen35TrunkLayer, Qwen35Weights, build_qwen35_decode_graph,
263    build_qwen35_decode_hir_dynamic_ext, build_qwen35_graph_sized, build_qwen35_graph_sized_ext,
264    build_qwen35_graph_sized_stub, build_qwen35_prefill_cache_graph,
265    build_qwen35_prefill_cache_graph_ext, build_qwen35_prefill_cache_hir_dynamic_ext,
266    decode_step_feeds, encode_chat_auto, format_chatml, messages_from_prompt, mrope_prefill_feeds,
267    mrope_row_for_sections, mrope_slice_at_pos, mtp_draft_vocab_size, pack_input_ids,
268    parse_messages_json, recurrent_output_count, seed_cache_from_outputs,
269    supports_multimodal_mrope, synth as qwen35_synth, text_section_pos, validate_device,
270    zero_recurrent_inputs,
271};
272pub use rlx_flux2::{Flux2Output, Flux2Runner, Flux2RunnerBuilder};
273pub use run::{
274    ConfigSource, DinoV2Output, DinoV2Runner, DinoV2RunnerBuilder, DinoV2Variant, Llama32Runner,
275    Llama32RunnerBuilder, LmRunner, ModelRunner, Precision, Qwen3Runner, Qwen3RunnerBuilder,
276    SamArch, SamPredictionAny, SamRunner, SamRunnerBuilder, Vjepa2Output, Vjepa2PoolOutput,
277    Vjepa2PredictOutput, Vjepa2Runner, Vjepa2RunnerBuilder, Wav2Vec2BertRunner,
278    Wav2Vec2BertRunnerBuilder, WeightFormat, debug_resolve_name, dispatch, dispatch_help,
279    list_mtp_keys, open_gguf_loader, open_loader, open_loader_resolved, open_loader_with_format,
280    register_runner, registered_runners, run_registered,
281};
282pub use sam::{
283    NeckWeights as SamNeckWeights, SamConfig, SamEncoderBuilt, SamEncoderConfig, SamEncoderFlow,
284    SamPreprocessWeights, apply_neck_host as sam_apply_neck_host,
285    assemble_patch_tokens as sam_assemble_patch_tokens, build_sam_encoder_built,
286    build_sam_encoder_graph, preprocess_image as sam_preprocess_image,
287};
288pub use sam2::{
289    FpnLevel as Sam2FpnLevel, FpnNeckWeights as Sam2FpnNeckWeights, Sam2, Sam2Config,
290    Sam2DecoderConfig, Sam2FpnConfig, Sam2HieraConfig, Sam2ImageEncoderBuilt, Sam2ImageEncoderFlow,
291    Sam2ImagePrediction, Sam2MaskDecoderOutput, Sam2MaskDecoderWeights, Sam2MemoryAttentionWeights,
292    Sam2MemoryConfig, Sam2MemoryEncoderConfig, Sam2MemoryEncoderOutput, Sam2MemoryEncoderWeights,
293    Sam2PreprocessWeights, Sam2PromptEncoderOutput, Sam2PromptEncoderWeights,
294    Sam2TwoWayTransformerWeights, Sam2VideoState, apply_fpn_neck as sam2_apply_fpn_neck,
295    apply_fpn_neck_host as sam2_apply_fpn_neck_host,
296    assemble_patch_tokens as sam2_assemble_patch_tokens, build_sam2_image_encoder_built,
297    build_sam2_image_encoder_graph, mask_decoder_forward as sam2_mask_decoder_forward,
298    memory_attention_forward as sam2_memory_attention_forward,
299    memory_encoder_forward as sam2_memory_encoder_forward,
300    preprocess_image as sam2_preprocess_image,
301    prompt_encoder_forward as sam2_prompt_encoder_forward,
302    two_way_transformer_forward as sam2_two_way_transformer_forward,
303};
304pub use sam3::{
305    Sam3, Sam3CompiledDecoder, Sam3Config, Sam3DetectorConfig, Sam3DetectorDecoderBuilt,
306    Sam3DetectorDecoderFlow, Sam3DetectorEncoderFlow, Sam3EncodedImage, Sam3ImagePrediction,
307    Sam3PreprocessWeights, Sam3TextConfig, Sam3TrackerConfig, Sam3VideoFramePrediction,
308    Sam3VideoState, Sam3VitConfig, assemble_patch_tokens as sam3_assemble_patch_tokens,
309    build_sam3_detector_decoder_built, build_sam3_detector_encoder_built,
310    build_sam3_detector_encoder_graph, forward_decoder_ir_on,
311    preprocess_image as sam3_preprocess_image,
312};
313pub use tide::{
314    BlockDenoiseConfig, BlockDenoiseLoop, GenerateConfig, PredictiveOffloadInfo,
315    PredictiveOffloadParams, TideOffloadStats, TideRunner, aggregate_offload_stats,
316    refresh_experts,
317};
318pub use vision::{VisionPreprocessWeights, build_vision_graph_sized};
319pub use vision_flow::{NomicVisionBuilt, NomicVisionFlow, build_nomic_vision_built};
320pub use vjepa2::{
321    Vjepa2Config, Vjepa2EncoderBuilt, Vjepa2EncoderFlow, Vjepa2EncoderOutput, Vjepa2EncoderWeights,
322    Vjepa2Masks, Vjepa2ModelWeights, Vjepa2PatchEmbedWeights, Vjepa2PoolerFlow,
323    Vjepa2PoolerWeights, Vjepa2PredictorFlow, Vjepa2PredictorWeights,
324    build_vjepa2_encoder_graph_sized, conv3d_patch_embed, encode_video_native,
325    extract_encoder_weights, extract_model_weights, extract_patch_embed_weights,
326    extract_pooler_weights, extract_predictor_weights, normalize_video_hwc, pool_native,
327    predict_native,
328};
329pub use voxtral::{
330    FAMILY as VOXTRAL_FAMILY, HF_MODEL_ID_MINI_3B, LanguageModelPrefixLoader,
331    MelSpectrogram as VoxtralMel, VoxtralAudioConfig, VoxtralConfig, VoxtralRunner,
332    VoxtralRunnerBuilder, VoxtralWeightPrefix, build_voxtral_decode_built,
333    build_voxtral_encoder_built, build_voxtral_prefill_built, build_voxtral_projector_built,
334    fuse_inputs_embeds, pcm_to_mel as voxtral_pcm_to_mel, transcription_prompt_ids,
335};
336pub use voxtral_tts::{
337    CodecDecoder, HF_MODEL_ID as VOXTRAL_TTS_HF_MODEL_ID, PRESET_VOICES as VOXTRAL_TTS_VOICES,
338    VoxtralTtsBenchReport, VoxtralTtsConfig, VoxtralTtsRunner, VoxtralTtsWeightStore,
339};
340pub use wav2vec2_bert::{
341    LogMelExtractor, LogMelFeatures, Wav2Vec2BertConfig, Wav2Vec2BertFlow,
342    Wav2Vec2BertPreprocessConfig, build_wav2vec2_bert_built, build_wav2vec2_bert_graph_sized,
343    load_wav_mono_f32,
344};
345pub use whisper::{
346    MelSpectrogram, WhisperConfig, WhisperDecoderFlow, WhisperEncoderFlow, WhisperKvCache,
347    WhisperRunner, WhisperRunnerBuilder, WhisperWeightPrefix, build_whisper_decode_step_built,
348    build_whisper_decoder_built, build_whisper_decoder_graph_sized,
349    build_whisper_decoder_prefill_built, build_whisper_encoder_built,
350    build_whisper_encoder_graph_sized, default_mel_frames, pcm_to_mel,
351};
rlx_models/lib.rs

rlx_models/
lib.rs