Expand description
Shared infrastructure for RLX model crates: HuggingFace config parsing,
safetensors / GGUF weight loading, tier-1 compile profile helpers, and
packed GGUF prefill guards (flow_bridge::packed_gguf_compile_guard, etc.).
Re-exports§
pub use device_capabilities::STANDARD_DEVICE_NAMES;pub use device_capabilities::STANDARD_DEVICES;pub use device_capabilities::device_memory_for_moe_offload;pub use device_capabilities::is_standard_device;pub use device_capabilities::validate_sam_device;pub use device_capabilities::validate_standard_device;pub use gguf_config::DINOV2_GGUF_ARCHES;pub use gguf_config::EMBED_GGUF_ARCHES;pub use gguf_config::EmbedGgufKind;pub use gguf_config::FLUX_GGUF_ARCHES;pub use gguf_config::GgufMemoryFootprint;pub use gguf_config::SAM_GGUF_ARCHES;pub use gguf_config::SAM2_GGUF_ARCHES;pub use gguf_config::SAM3_GGUF_ARCHES;pub use gguf_config::VJEPA2_GGUF_ARCHES;pub use gguf_config::W2V_BERT_GGUF_ARCHES;pub use gguf_config::embed_gguf_kind;pub use gguf_config::gguf_memory_footprint;pub use gguf_config::gguf_meta_u32;pub use gguf_config::gguf_runner_hint;pub use gguf_config::is_dinov2_gguf_arch;pub use gguf_config::is_embed_gguf_arch;pub use gguf_config::is_flux_gguf_arch;pub use gguf_config::is_sam_gguf_arch;pub use gguf_config::is_sam2_gguf_arch;pub use gguf_config::is_sam3_gguf_arch;pub use gguf_config::is_vjepa2_gguf_arch;pub use gguf_config::is_w2v_bert_gguf_arch;pub use gguf_resolve::GgufTensorNameResolver;pub use gguf_resolve::LlamaFamilyGgufResolver;pub use gguf_resolve::PassThroughGgufResolver;pub use gguf_resolve::PrefixStripGgufResolver;pub use gguf_resolve::Qwen35NativeGgufResolver;pub use gguf_resolve::register_gguf_tensor_resolver;pub use gguf_resolve::resolve_gguf_tensor_name;pub use gguf_support::GgufModelFamily;pub use gguf_support::ResolveWeightsOptions;pub use gguf_support::assert_gguf_family;pub use gguf_support::gguf_architecture_from_path;pub use gguf_support::gguf_architecture_str;pub use gguf_support::gguf_f32_bytes_estimate;pub use gguf_support::gguf_family_for_arch;pub use gguf_support::gguf_safetensors_only_hint;pub use gguf_support::gguf_split_hint;pub use gguf_support::gguf_split_siblings;pub use gguf_support::gguf_validate_arch;pub use gguf_support::list_gguf_files_in_dir;pub use gguf_support::load_gguf_file;pub use gguf_support::resolve_weights_file;pub use gguf_support::resolve_weights_file_with_options;pub use autoregressive::compile_cache_ensure_graph;pub use autoregressive::kv_from_prefill_outputs;pub use autoregressive::past_kv_input_names;pub use autoregressive::prefill_cache_key;pub use autoregressive::run_bucketed_kv_decode;pub use autoregressive::run_bucketed_kv_decode_hir;pub use autoregressive::split_bucketed_decode_kv;pub use autoregressive::split_decode_logits_kv;pub use config::BertConfig;pub use config::NomicBertConfig;pub use config::NomicVisionConfig;pub use flow_bridge::apply_compile_profile;pub use flow_bridge::compile_graph_encoder;pub use flow_bridge::compile_graph_gemma_decode;pub use flow_bridge::compile_graph_gemma_prefill;pub use flow_bridge::compile_graph_legacy;pub use flow_bridge::compile_graph_llama32_decode;pub use flow_bridge::compile_graph_llama32_prefill;pub use flow_bridge::compile_graph_qwen3_decode;pub use flow_bridge::compile_graph_qwen3_prefill;pub use flow_bridge::compile_graph_qwen35_decode;pub use flow_bridge::compile_graph_qwen35_prefill;pub use flow_bridge::compile_graph_sam;pub use flow_bridge::compile_graph_with_profile;pub use flow_bridge::compile_options_for_packed_gguf_prefill;pub use flow_bridge::compile_options_for_packed_gguf_prefill_with_profile;pub use flow_bridge::compile_options_for_profile;pub use flow_bridge::load_compile_profile;pub use flow_bridge::packed_gguf_compile_guard;pub use flow_bridge::packed_gguf_execution_device;pub use flow_bridge::profile_near_weights;pub use flow_util::WeightMapSource;pub use flow_util::bucket_cache_ensure_built;pub use flow_util::build_graph;pub use flow_util::built_from_graph;pub use flow_util::built_from_hir;pub use flow_util::built_from_hir_with_profile;pub use flow_util::compile_built;pub use flow_util::compile_built_cpu;pub use flow_util::compile_cache_ensure_built;pub use flow_util::compile_graph_encoder_with_params;pub use flow_util::compile_graph_gemma_decode_with_params;pub use flow_util::compile_graph_gemma_prefill_with_params;pub use flow_util::compile_graph_profile;pub use flow_util::compile_graph_qwen3_prefill_with_params;pub use flow_util::compile_graph_qwen35_decode_with_params;pub use flow_util::compile_graph_qwen35_prefill_with_params;pub use flow_util::compile_graph_sam_with_params;pub use flow_util::compile_graph_with_kv_export_params;pub use flow_util::graph_from_built;pub use flow_util::graph_from_hir;pub use gguf_resolve::ensure_builtin_resolvers;pub use gguf_support::DEFAULT_GGUF_PREFER_SUBSTR;pub use lm::FlowBuildExt;pub use lm::into_compile_parts;pub use weight_loader::GgufLoader;pub use weight_loader::HfTranslatingLoader;pub use weight_loader::WeightLoader;pub use weight_loader::ggml_type_to_quant_scheme;pub use weight_loader::gguf_to_hf_name;pub use weight_loader::gguf_to_hf_name_for_arch;pub use weight_loader::hf_to_gguf_name;pub use weight_loader::is_mtp_weight;pub use weight_loader::load_from_path;pub use weight_map::WeightDrainPolicy;pub use weight_map::WeightMap;pub use weight_registry::LoadWeightsOptions;pub use weight_registry::LoadedWeights;pub use weight_registry::RegisteredFormat;pub use weight_registry::WeightFormatRegistration;pub use weight_registry::format_for_extension;pub use weight_registry::list_registered_formats;pub use weight_registry::load_weight_map_resolved;pub use weight_registry::load_weights_resolved;pub use weight_registry::open_weight_loader;pub use weight_registry::register_weight_format;pub use weight_registry::registered_extensions_hint;pub use weights::GgufDirGuide;pub use weights::LoadOpts;pub use weights::ResolveOpts;pub use weights::default_resolve_opts;pub use weights::gguf_dir_guide;pub use weights::init;pub use weights::load_weight_map;pub use weights::open;pub use weights::open_map;pub use weights::open_map_with;pub use weights::open_with;pub use weights::pick;pub use weights::pick_default;
Modules§
- arch_
registry - Architecture registry (plan #82).
- autoregressive
- Shared helpers for autoregressive decode loops (KV cache + bucketed compile cache).
- config
- Model configuration structs — parsed from HuggingFace config.json.
- dataprocessing
- Reusable batch-prep utilities (plan #83).
- device_
capabilities - Shared backend policy for RLX model crates.
- flow_
bridge - Bridge between
rlx-modelsloaders/runtime andrlx-flow. - flow_
util - Shared helpers for tier-0 model flow migration.
- gguf_
config - Read HuggingFace-shaped config fields from GGUF metadata (
{arch}.*keys). - gguf_
resolve - Pluggable GGUF tensor-name resolution per
general.architecture. - gguf_
support - Shared GGUF helpers for LM runners (architecture checks, path resolution).
- lm
- Shared causal-LM flow helpers — re-export tier-0 surface for model authors.
- moe_
weights - GGUF MoE expert-stack loader.
- vision_
ops_ ir - Shared HIR builders for NCHW vision ops (
Conv,ConvTranspose2d,LayerNorm2d, bias broadcast). Used by SAM / SAM2 / SAM3. - weight_
loader - Pluggable weight loader trait (plan #56).
- weight_
map - Safetensors weight loading — standalone, no framework dependency.
- weight_
registry - Extensible weight-format registry — register custom loaders for new extensions.
- weights
- Model-agnostic weight I/O — paths, formats, drain policy only.
Structs§
- KvCache
State - Layer-wise past K/V tensors in row-major
[past_len * kv_dim]layout per layer.