Skip to main content

Crate rlx_models_core

Crate rlx_models_core 

Source
Expand description

Shared infrastructure for RLX model crates: HuggingFace config parsing, safetensors / GGUF weight loading, tier-1 compile profile helpers, and packed GGUF prefill guards (flow_bridge::packed_gguf_compile_guard, etc.).

Re-exports§

pub use device_capabilities::STANDARD_DEVICE_NAMES;
pub use device_capabilities::STANDARD_DEVICES;
pub use device_capabilities::device_memory_for_moe_offload;
pub use device_capabilities::is_standard_device;
pub use device_capabilities::validate_sam_device;
pub use device_capabilities::validate_standard_device;
pub use gguf_config::DINOV2_GGUF_ARCHES;
pub use gguf_config::EMBED_GGUF_ARCHES;
pub use gguf_config::EmbedGgufKind;
pub use gguf_config::FLUX_GGUF_ARCHES;
pub use gguf_config::GgufMemoryFootprint;
pub use gguf_config::SAM_GGUF_ARCHES;
pub use gguf_config::SAM2_GGUF_ARCHES;
pub use gguf_config::SAM3_GGUF_ARCHES;
pub use gguf_config::VJEPA2_GGUF_ARCHES;
pub use gguf_config::W2V_BERT_GGUF_ARCHES;
pub use gguf_config::embed_gguf_kind;
pub use gguf_config::gguf_memory_footprint;
pub use gguf_config::gguf_meta_u32;
pub use gguf_config::gguf_runner_hint;
pub use gguf_config::is_dinov2_gguf_arch;
pub use gguf_config::is_embed_gguf_arch;
pub use gguf_config::is_flux_gguf_arch;
pub use gguf_config::is_sam_gguf_arch;
pub use gguf_config::is_sam2_gguf_arch;
pub use gguf_config::is_sam3_gguf_arch;
pub use gguf_config::is_vjepa2_gguf_arch;
pub use gguf_config::is_w2v_bert_gguf_arch;
pub use gguf_resolve::GgufTensorNameResolver;
pub use gguf_resolve::LlamaFamilyGgufResolver;
pub use gguf_resolve::PassThroughGgufResolver;
pub use gguf_resolve::PrefixStripGgufResolver;
pub use gguf_resolve::Qwen35NativeGgufResolver;
pub use gguf_resolve::register_gguf_tensor_resolver;
pub use gguf_resolve::resolve_gguf_tensor_name;
pub use gguf_support::GgufModelFamily;
pub use gguf_support::ResolveWeightsOptions;
pub use gguf_support::assert_gguf_family;
pub use gguf_support::gguf_architecture_from_path;
pub use gguf_support::gguf_architecture_str;
pub use gguf_support::gguf_f32_bytes_estimate;
pub use gguf_support::gguf_family_for_arch;
pub use gguf_support::gguf_safetensors_only_hint;
pub use gguf_support::gguf_split_hint;
pub use gguf_support::gguf_split_siblings;
pub use gguf_support::gguf_validate_arch;
pub use gguf_support::list_gguf_files_in_dir;
pub use gguf_support::load_gguf_file;
pub use gguf_support::resolve_weights_file;
pub use gguf_support::resolve_weights_file_with_options;
pub use autoregressive::compile_cache_ensure_graph;
pub use autoregressive::kv_from_prefill_outputs;
pub use autoregressive::past_kv_input_names;
pub use autoregressive::prefill_cache_key;
pub use autoregressive::run_bucketed_kv_decode;
pub use autoregressive::run_bucketed_kv_decode_hir;
pub use autoregressive::split_bucketed_decode_kv;
pub use autoregressive::split_decode_logits_kv;
pub use config::BertConfig;
pub use config::NomicBertConfig;
pub use config::NomicVisionConfig;
pub use flow_bridge::apply_compile_profile;
pub use flow_bridge::compile_graph_encoder;
pub use flow_bridge::compile_graph_gemma_decode;
pub use flow_bridge::compile_graph_gemma_prefill;
pub use flow_bridge::compile_graph_legacy;
pub use flow_bridge::compile_graph_llama32_decode;
pub use flow_bridge::compile_graph_llama32_prefill;
pub use flow_bridge::compile_graph_qwen3_decode;
pub use flow_bridge::compile_graph_qwen3_prefill;
pub use flow_bridge::compile_graph_qwen35_decode;
pub use flow_bridge::compile_graph_qwen35_prefill;
pub use flow_bridge::compile_graph_sam;
pub use flow_bridge::compile_graph_with_profile;
pub use flow_bridge::compile_options_for_packed_gguf_prefill;
pub use flow_bridge::compile_options_for_packed_gguf_prefill_with_profile;
pub use flow_bridge::compile_options_for_profile;
pub use flow_bridge::load_compile_profile;
pub use flow_bridge::packed_gguf_compile_guard;
pub use flow_bridge::packed_gguf_execution_device;
pub use flow_bridge::profile_near_weights;
pub use flow_util::WeightMapSource;
pub use flow_util::bucket_cache_ensure_built;
pub use flow_util::build_graph;
pub use flow_util::built_from_graph;
pub use flow_util::built_from_hir;
pub use flow_util::built_from_hir_with_profile;
pub use flow_util::compile_built;
pub use flow_util::compile_built_cpu;
pub use flow_util::compile_cache_ensure_built;
pub use flow_util::compile_graph_encoder_with_params;
pub use flow_util::compile_graph_gemma_decode_with_params;
pub use flow_util::compile_graph_gemma_prefill_with_params;
pub use flow_util::compile_graph_profile;
pub use flow_util::compile_graph_qwen3_prefill_with_params;
pub use flow_util::compile_graph_qwen35_decode_with_params;
pub use flow_util::compile_graph_qwen35_prefill_with_params;
pub use flow_util::compile_graph_sam_with_params;
pub use flow_util::compile_graph_with_kv_export_params;
pub use flow_util::graph_from_built;
pub use flow_util::graph_from_hir;
pub use gguf_resolve::ensure_builtin_resolvers;
pub use gguf_support::DEFAULT_GGUF_PREFER_SUBSTR;
pub use lm::FlowBuildExt;
pub use lm::into_compile_parts;
pub use weight_loader::GgufLoader;
pub use weight_loader::HfTranslatingLoader;
pub use weight_loader::WeightLoader;
pub use weight_loader::ggml_type_to_quant_scheme;
pub use weight_loader::gguf_to_hf_name;
pub use weight_loader::gguf_to_hf_name_for_arch;
pub use weight_loader::hf_to_gguf_name;
pub use weight_loader::is_mtp_weight;
pub use weight_loader::load_from_path;
pub use weight_map::WeightDrainPolicy;
pub use weight_map::WeightMap;
pub use weight_registry::LoadWeightsOptions;
pub use weight_registry::LoadedWeights;
pub use weight_registry::RegisteredFormat;
pub use weight_registry::WeightFormatRegistration;
pub use weight_registry::format_for_extension;
pub use weight_registry::list_registered_formats;
pub use weight_registry::load_weight_map_resolved;
pub use weight_registry::load_weights_resolved;
pub use weight_registry::open_weight_loader;
pub use weight_registry::register_weight_format;
pub use weight_registry::registered_extensions_hint;
pub use weights::GgufDirGuide;
pub use weights::LoadOpts;
pub use weights::ResolveOpts;
pub use weights::default_resolve_opts;
pub use weights::gguf_dir_guide;
pub use weights::init;
pub use weights::load_weight_map;
pub use weights::open;
pub use weights::open_map;
pub use weights::open_map_with;
pub use weights::open_with;
pub use weights::pick;
pub use weights::pick_default;

Modules§

arch_registry
Architecture registry (plan #82).
autoregressive
Shared helpers for autoregressive decode loops (KV cache + bucketed compile cache).
config
Model configuration structs — parsed from HuggingFace config.json.
dataprocessing
Reusable batch-prep utilities (plan #83).
device_capabilities
Shared backend policy for RLX model crates.
flow_bridge
Bridge between rlx-models loaders/runtime and rlx-flow.
flow_util
Shared helpers for tier-0 model flow migration.
gguf_config
Read HuggingFace-shaped config fields from GGUF metadata ({arch}.* keys).
gguf_resolve
Pluggable GGUF tensor-name resolution per general.architecture.
gguf_support
Shared GGUF helpers for LM runners (architecture checks, path resolution).
lm
Shared causal-LM flow helpers — re-export tier-0 surface for model authors.
moe_weights
GGUF MoE expert-stack loader.
vision_ops_ir
Shared HIR builders for NCHW vision ops (Conv, ConvTranspose2d, LayerNorm2d, bias broadcast). Used by SAM / SAM2 / SAM3.
weight_loader
Pluggable weight loader trait (plan #56).
weight_map
Safetensors weight loading — standalone, no framework dependency.
weight_registry
Extensible weight-format registry — register custom loaders for new extensions.
weights
Model-agnostic weight I/O — paths, formats, drain policy only.

Structs§

KvCacheState
Layer-wise past K/V tensors in row-major [past_len * kv_dim] layout per layer.