Skip to main content

rlx_models_core/
lib.rs

1// RLX — versatile ML compiler + runtime.
2// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, version 3.
7//
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License
14// along with this program. If not, see <https://www.gnu.org/licenses/>.
15
16//! Shared infrastructure for RLX model crates: HuggingFace config parsing,
17//! safetensors / GGUF weight loading, tier-1 compile profile helpers, and
18//! packed GGUF prefill guards ([`flow_bridge::packed_gguf_compile_guard`], etc.).
19
20pub mod arch_registry;
21pub mod autoregressive;
22pub mod config;
23pub mod dataprocessing;
24pub mod device_capabilities;
25pub mod embedded_safetensors;
26pub mod flow_bridge;
27pub mod flow_util;
28pub mod gguf_config;
29pub mod gguf_resolve;
30pub mod gguf_support;
31pub mod gpu_kv;
32pub mod lm;
33pub mod moe_weights;
34pub mod safetensors_checkpoint;
35pub mod vision_ops_ir;
36pub mod weight_loader;
37pub mod weight_map;
38pub mod weight_registry;
39pub mod weights;
40
41pub use device_capabilities::{
42    STANDARD_DEVICE_NAMES, STANDARD_DEVICES, device_memory_for_moe_offload, is_standard_device,
43    validate_sam_device, validate_standard_device,
44};
45
46pub use gguf_config::{
47    DINOV2_GGUF_ARCHES, EMBED_GGUF_ARCHES, EmbedGgufKind, FLUX_GGUF_ARCHES, GgufMemoryFootprint,
48    SAM_GGUF_ARCHES, SAM2_GGUF_ARCHES, SAM3_GGUF_ARCHES, VJEPA2_GGUF_ARCHES, W2V_BERT_GGUF_ARCHES,
49    embed_gguf_kind, gguf_memory_footprint, gguf_meta_u32, gguf_runner_hint, is_dinov2_gguf_arch,
50    is_embed_gguf_arch, is_flux_gguf_arch, is_sam_gguf_arch, is_sam2_gguf_arch, is_sam3_gguf_arch,
51    is_vjepa2_gguf_arch, is_w2v_bert_gguf_arch,
52};
53pub use gguf_resolve::{
54    GgufTensorNameResolver, LlamaFamilyGgufResolver, PassThroughGgufResolver,
55    PrefixStripGgufResolver, Qwen35NativeGgufResolver, register_gguf_tensor_resolver,
56    resolve_gguf_tensor_name,
57};
58pub use gguf_support::{
59    GgufModelFamily, ResolveWeightsOptions, assert_gguf_family, gguf_architecture_from_path,
60    gguf_architecture_str, gguf_f32_bytes_estimate, gguf_family_for_arch,
61    gguf_safetensors_only_hint, gguf_split_hint, gguf_split_siblings, gguf_validate_arch,
62    list_gguf_files_in_dir, load_gguf_file, resolve_weights_file,
63    resolve_weights_file_with_options,
64};
65
66pub use autoregressive::{
67    KvCacheState, compile_cache_ensure_graph, infer_prefill_kv_seq, kv_from_prefill_outputs,
68    kv_from_prefill_outputs_per_layer, packed_prefill_active_extent_enabled, past_kv_input_names,
69    prefill_cache_key, run_bucketed_kv_decode, run_bucketed_kv_decode_graph_layers_scratch,
70    run_bucketed_kv_decode_hir, run_bucketed_kv_decode_hir_scratch,
71    run_bucketed_kv_decode_hir_uniform, run_bucketed_kv_decode_keyed, run_packed_prefill,
72    split_bucketed_decode_kv, split_bucketed_decode_kv_per_layer, split_decode_logits_kv,
73};
74pub use config::{BertConfig, NomicBertConfig, NomicVisionConfig};
75pub use embedded_safetensors::EmbeddedSafetensors;
76pub use flow_bridge::{
77    apply_compile_profile, compile_graph_encoder, compile_graph_gemma_decode,
78    compile_graph_gemma_prefill, compile_graph_legacy, compile_graph_llama32_decode,
79    compile_graph_llama32_prefill, compile_graph_qwen3_decode, compile_graph_qwen3_prefill,
80    compile_graph_qwen35_decode, compile_graph_qwen35_prefill, compile_graph_sam,
81    compile_graph_with_profile, compile_options_for_packed_gguf_prefill,
82    compile_options_for_packed_gguf_prefill_with_profile, compile_options_for_profile,
83    load_compile_profile, packed_gguf_compile_guard, packed_gguf_execution_device,
84    profile_near_weights,
85};
86pub use flow_util::{
87    WeightMapSource, bucket_cache_ensure_built, build_graph, built_from_graph, built_from_hir,
88    built_from_hir_with_profile, compile_built, compile_built_cpu, compile_cache_ensure_built,
89    compile_cache_ensure_built_with_options, compile_graph_encoder_with_params,
90    compile_graph_gemma_decode_with_params, compile_graph_gemma_prefill_with_params,
91    compile_graph_profile, compile_graph_qwen3_prefill_with_params,
92    compile_graph_qwen35_decode_with_params, compile_graph_qwen35_prefill_with_params,
93    compile_graph_sam_with_params, compile_graph_with_kv_export_params, graph_from_built,
94    graph_from_hir,
95};
96pub use gguf_resolve::ensure_builtin_resolvers;
97pub use gguf_support::DEFAULT_GGUF_PREFER_SUBSTR;
98pub use gpu_kv::{
99    GpuKvBinding, GpuKvCacheSet, cross_attn_gpu_handles_ready, device_supports_gpu_kv,
100    install_cross_attn_gpu_handles, install_gpu_kv_handles, reinstall_gpu_kv_handles,
101    run_bucketed_kv_decode_gpu, run_bucketed_kv_decode_gpu_hir, run_bucketed_kv_mtp_gpu,
102    sync_gpu_kv_to_host,
103};
104pub use lm::{FlowBuildExt, into_compile_parts};
105pub use safetensors_checkpoint::SafetensorsCheckpoint;
106pub use weight_loader::{
107    GgufLoader, HfTranslatingLoader, WeightLoader, dequant_matmul_supported,
108    ggml_type_to_quant_scheme, gguf_to_hf_name, gguf_to_hf_name_for_arch, hf_to_gguf_name,
109    is_mtp_weight, load_from_path,
110};
111pub use weight_map::{WeightDrainPolicy, WeightMap};
112pub use weight_registry::{
113    LoadWeightsOptions, LoadedWeights, RegisteredFormat, WeightFormatRegistration,
114    format_for_extension, list_registered_formats, load_weight_map_resolved, load_weights_resolved,
115    open_weight_loader, register_weight_format, registered_extensions_hint,
116};
117pub use weights::{
118    GgufDirGuide, LoadOpts, ResolveOpts, default_resolve_opts, gguf_dir_guide, init,
119    load_weight_map, open, open_map, open_map_with, open_with, pick, pick_default,
120};