Expand description
RLX Tensor IR — the intermediate representation for the RLX ML compiler.
This IR is:
- Standalone: no runtime, no backend, no framework coupling
- Serializable: graphs can be saved/loaded for AOT compilation
- Optimizable: designed for pattern-matching fusion and buffer planning
The compiler pipeline has three named levels:
- HIR (
hir) — block-oriented IR for model builders (Linear,SwiGLU,ResidualRmsNorm, …). - MIR (
mir) — fused tensor DAG; input to [rlx_opt]. - LIR (
lir) — optimized MIR + arena buffer plan for backends.
Graph is the primary DX surface. Use Graph::define for
fusion-first HIR builders, or Graph::new / GraphModule::mir
for primitive MIR. GraphModule tracks pipeline stage (HIR/MIR/LIR).
Re-exports§
pub use nvfp4::FP4_E2M1_LUT;pub use nvfp4::NVFP4_GROUP_SIZE;pub use nvfp4::fp4_e2m1_to_f32;pub use nvfp4::fp8_e4m3_scale_to_f32;pub use ad::AdPipelineStage;pub use async_copy::AsyncCopy;pub use async_copy::BarrierToken;pub use async_copy::DoubleBuffer;pub use async_copy::SyncCopy;pub use attention_layout::ATTENTION_FLASH_MAX_HEAD_DIM;pub use attention_layout::AttentionGeom;pub use attention_layout::AttentionLaunchStrides;pub use attention_layout::attention_dispatch_use_row;pub use attention_layout::attention_geom;pub use attention_layout::attention_launch_strides;pub use attention_layout::cpu_attention_bshd;pub use attention_layout::cpu_attention_packed_bshd_qkv;pub use attention_layout::detect_packed_bshd_qkv_attention;pub use attention_layout::mask_strides_bhsd;pub use attention_layout::mask_strides_for_shape;pub use attention_layout::packed_bshd_narrow_elidable;pub use attention_layout::packed_bshd_qkv_strides;pub use attention_layout::strides_bhsd;pub use attention_layout::strides_bshd;pub use attention_layout::strides_for_shape;pub use dtype::DType;pub use dtype::Element;pub use dtype::ElementSubtype;pub use dtype::scalar_constant_bytes;pub use dynamic::sym;pub use dynamic::DimEnv;pub use dynamic::bind_graph;pub use dynamic::collect_dynamic_symbols;pub use dynamic::has_dynamic_dims;pub use dynamic::infer_bindings_from_f32_inputs;pub use dynamic::infer_bindings_from_inputs;pub use dynamic::same_binding;pub use dynamic::sync_concat_shapes;pub use dynamic::sync_expand_ops;pub use dynamic::sync_graph_shapes;pub use dynamic::sync_narrow_ops;pub use dynamic::sync_reshape_ops;pub use env::RlxEnv;pub use env::RuntimeOverrides;pub use env::flag;pub use env::is_unset;pub use env::parse_or;pub use env::set;pub use env::unset;pub use env::var;pub use env::var_os;pub use fft::FftGpuPlan;pub use fft::FftMeta;pub use fft::FftNorm;pub use fft::fft_meta;pub use fft::fftn_axes_all;pub use fft::normalize_fftn_axes;pub use graph::Graph;pub use graph::Node;pub use graph::NodeId;pub use hir::FusionPolicy;pub use hir::HirGraphExt;pub use hir::HirModule;pub use hir::HirMut;pub use hir::HirNode;pub use hir::HirNodeId;pub use hir::HirOp;pub use infer::GraphExt;pub use inspect::inspect_buffer_plan;pub use inspect::inspect_graph;pub use inspect::inspect_graph_diff;pub use inspect::inspect_hir;pub use inspect::inspect_hir_stats;pub use inspect::inspect_lir;pub use inspect::inspect_mir;pub use inspect::inspect_mir_diff;pub use inspect::inspect_mir_stats;pub use layout::Coord2;pub use layout::Ragged;pub use layout::ShapeTuple;pub use layout::Strides2;pub use layout::Strides3;pub use layout::Tile2;pub use layout::Tile3;pub use lir::LirBufferPlan;pub use lir::LirBufferSlot;pub use lir::LirFingerprint;pub use lir::LirIoManifest;pub use lir::LirModule;pub use lir::LirViewAlias;pub use logical_kernel::KernelDispatchConfig;pub use logical_kernel::KernelDispatchPolicy;pub use logical_kernel::LogicalKernelEntry;pub use logical_kernel::logical_kinds_in_graph;pub use logical_kernel::registered_logical_kernels;pub use logical_kernel::should_lower_to_common;pub use measure::CacheBuster;pub use measure::Tick;pub use measure::time_ns;pub use mir::MirModule;pub use mir::MirNode;pub use mir::MirNodeId;pub use mir::MirOp;pub use module::GraphModule;pub use module::GraphStage;pub use op::ChainOperand;pub use op::ChainStep;pub use op::Op;pub use op::OpKind;pub use op::RegionPrologue;pub use op::TransformStep;pub use op_registry::JvpContext;pub use op_registry::OpExtension;pub use op_registry::OpRegistry;pub use op_registry::VjpContext;pub use op_registry::VmapContext;pub use op_registry::global_registry;pub use op_registry::lookup_op;pub use op_registry::register_op;pub use ops::attention::attention_kind_op;pub use phase::Phase;pub use phase::PhaseSchedule;pub use phase::derive_phases;pub use provenance::NodeOrigin;pub use provenance::node_label;pub use provenance::stamp_pass_origins;pub use quant::QuantMap;pub use quant::QuantScheme;pub use region_encode::FK_BATCH_SINGLE_KERNEL_MAX;pub use region_encode::PrologueLaunchGrid;pub use region_encode::REGION_META_WORDS;pub use region_encode::REGION_PROLOGUE_NONE;pub use region_encode::REGION_PROLOGUE_RESIZE_NEAREST_2X_NCHW;pub use region_encode::RegionNchwDims;pub use region_encode::batch_region_slice_dst_off_f32;pub use region_encode::batch_region_slice_elems;pub use region_encode::batch_region_slice_shape;pub use region_encode::encode_chain_operand;pub use region_encode::encode_chain_steps;pub use region_encode::encode_elementwise_region_meta;pub use region_encode::encode_prologue_tail;pub use region_encode::fk_batch_single_kernel_enabled;pub use region_encode::fk_batch_use_single_launch;pub use rng::Philox4x32;pub use verify::VerifyError;pub use verify::verify;pub use verify::verify_all;pub use verify::verify_shapes;pub use binding_manifest::BindingManifest;pub use binding_manifest::IoBindingEntry;pub use binding_manifest::WeightBlock;pub use component::CompilationMode;pub use component::ModelComponent;pub use hir_extension::HirExtensionFn;pub use hir_extension::apply_hir_extensions;pub use hir_extension::apply_hir_extensions_named;pub use hir_extension::register_hir_extension;pub use hir_extension::registered_hir_extensions;pub use reflect::BlockSpecialization;pub use reflect::HirReflection;pub use reflect::ManifestDiff;pub use reflect::MirReflection;pub use reflect::SpecializeBlockRecord;pub use reflect::layout_for_binding;pub use reflect::layout_from_lir;pub use reflect::probe_block_specialization;pub use reflect::symbolic_layout_hint;pub use rf::complex_div;pub use rf::const_f32;pub use rf::cs_degen_z_in;pub use rf::find_param_node;pub use rf::find_param_nodes;pub use rf::mag2;pub use rf::s11_from_z;pub use rf::scalar_f32;pub use shape::Dim;pub use shape::DimBinding;pub use shape::Shape;pub use variant::ModelPhase;pub use variant::ModelVariant;
Modules§
- ad
- Autodiff staging in the HIR → MIR → LIR pipeline.
- async_
copy - Async tile-copy + double-buffer primitives (plan #22).
- attention_
layout - Shared attention Q/K/V layout detection for backends.
- audio
- Audio frontend helpers for
Op::LogMel/Op::LogMelBackward. - binding_
manifest - Reflected binding layout from a specialized
LirModule. - component
- Unified model component — one object drives specialization, compile cache, and binding.
- const_
check - Compile-time shape / rank assertions (plan #77).
- dtype
- Element data types for tensors.
- dynamic
- Dynamic / symbolic dimensions — compile once, specialize at runtime.
- env
- Unified
RLX_*configuration — readable from code overrides or process env. - fft
- Shared metadata for
Op::Fftlowering and host-fallback dispatch. - graph
- The computation graph — a DAG of typed tensor operations.
- hir
- HIR — high-level IR.
- hir_
extension - Retroactive HIR extensions (Slang
extensiondeclarations). - infer
- Shape-inferred graph builder — ergonomic API that auto-computes output shapes.
- infer_
shape - Re-derive output shapes from inputs — used by the verifier to catch
builder / pass bugs that assign the wrong
Node::shape. - inspect
- Text exporters for inspecting HIR / MIR / LIR during lowering.
- layout
- Shared layout vocabulary (plan #3).
- lir
- LIR — low-level IR.
- logical_
kernel - One logical kernel, many backends — dispatch policy and registry.
- measure
- Cycle-accurate timing primitive (#66 in plan.md).
- mir
- MIR — mid-level IR.
- module
GraphModule— unified higher-order DX over HIR / MIR / LIR.- nvfp4
- NVIDIA FP4 (E2M1) block layout shared by FLUX / MLX
nvfp4mode. - op
- Operation types — every tensor op in the RLX IR.
- op_
registry - Op registry — pluggable, trait-based extension point for custom ops.
- ops
- Per-op graph builders (plan #53).
- perfetto
- PLAN L3: Perfetto / chrome-trace JSON output for cross-backend timeline capture.
- phase
- Streaming inference phases attached to LIR nodes (plan #16 / #28).
- pretty
- Annotated graph dump for debugging.
- provenance
- Cross-stage node provenance — HIR block → MIR node → fusion pass.
- quant
- Quantization metadata as graph annotations (plan #57). lives as per-tensor metadata on the IR rather than spawning a parallel “quantized graph” type. Ops can read the scheme and dispatch to fused-dequant kernels (the eventual #5 win) when present, or fall through to the standard f32/f16 path when not.
- reflect
- Reflection over HIR/MIR/LIR — layout and structure without executing.
- region_
encode - Shared
Op::ElementwiseRegionmetadata encoding for GPU region kernels. - rf
- RF / complex-scalar graph builders — shared by EDA inverse-design crates.
- rng
- Counter-based deterministic RNG (plan #43).
- shape
- Tensor shapes with static and dynamic dimensions.
- target
- Compile-time target predicates (plan #78).
- variant
- Model execution variants — one object drives cache keys and
DimBinding. - verify
- Graph verification — catches IR bugs early.
Macros§
- debug_
assert_ valid - Panic when verification fails. Debug builds only — in release this macro expands to nothing and is not compiled.
- static_
assert - Compile-time assert. Wraps the const-evaluation idiom in a
terse macro so call sites read like
static_assert!(cond). - trace_
span - Open a Perfetto trace span. The returned
Option<TraceSpan>isNonewhen tracing is disabled — bind it to_spanso the scope drop point is the natural end-of-block.
Functions§
- hir_
to_ graph - Lower a HIR module to MIR, then extract the legacy
GraphAPI surface.