1pub mod aot_cache;
56pub mod attn_mask;
57pub mod backend;
58pub mod backends_manifest;
59pub mod compile_cache;
60pub mod compile_config;
61pub mod compiled;
62pub mod cost;
63mod cpu_low_precision;
64pub mod device_bench;
65pub mod device_ext;
66pub mod device_parse;
67pub mod device_policy;
68pub mod expert_pool;
69pub mod graph_io;
70pub mod jacfwd;
71pub mod kernel_trace;
72pub mod kv_cache;
73pub mod lora_scheduler;
74pub mod memory_estimate;
75pub mod model_pipeline;
76pub mod moe_expert_store;
77#[cfg(feature = "cpu")]
78pub mod onnx_active;
79pub mod op_registry;
80pub mod options;
81pub mod paged_kv;
82pub mod precision;
83pub mod precompile;
84pub mod quantized_kv;
85pub mod record_replay;
86pub mod reflect;
87pub mod registry;
88pub mod router;
89pub mod samplers;
90pub mod session;
91pub mod stages;
92pub mod subgraph;
93pub mod trace;
94pub mod weight_registry;
95pub mod weights;
96pub mod worker_pool;
97pub use rlx_ir::perfetto;
102pub mod custom_ops;
103pub mod device_router;
104pub mod flexible_session;
105pub mod graph_devices;
106pub mod hwinfo;
107pub mod lm;
108pub mod logit_verify;
109pub mod nan_check;
110pub mod phase;
111pub mod spec_decode;
112pub mod telemetry;
113pub mod validators;
114
115pub mod mock_requests;
118
119pub use rlx_driver::{Buffer, BufferHandle, CommandStream, Device, DeviceArena, SyncStream};
121pub use rlx_driver::{
123 CollectiveError, LocalTransport, Rank, SymmetricBuffer, SymmetricHeap, SymmetricTransport,
124};
125pub use aot_cache::{AotCache, AotCacheError};
127pub use backend::{Backend, ExecutableGraph, compile_hir, compile_module};
128pub use backends_manifest::BackendsManifest;
129pub use compile_cache::{
130 BucketedCompileCache, CacheRunInput, CompileCache, DynamicDimCompileCache, pad_rows, slice_rows,
131};
132pub use compile_config::{
133 COMPILE_OUTPUT_CAP_ENV, COMPILE_OUTPUT_CAP_ENV_MLX, DEFAULT_COMPILE_OUTPUT_CAP,
134 compile_output_cap, device_has_compile_output_cap, reset_compile_output_cap,
135 set_compile_output_cap,
136};
137pub use compiled::CompiledGraph;
138pub use cost::fastest_device_for;
139pub use device_bench::{DeviceBenchResult, benchmark_devices, warm_all};
140#[cfg(feature = "apple")]
141pub use device_ext::available_apple_devices;
142pub use device_ext::{
143 available_devices, devices_for, dispatch_report_for_device,
144 dispatch_report_for_device_with_options, fastest_device, first_unsupported_op,
145 first_unsupported_op_with_options, full_name, is_available, legalize_graph_for_device,
146 legalize_graph_for_device_with_options, legalize_graph_for_device_with_report, supports,
147 supports_graph, supports_graph_with_options, supports_run_slots,
148};
149pub use device_parse::{ParseDeviceError, device_label, parse_device, parse_device_list};
150pub use device_policy::{
151 DeviceCandidate, DeviceFallbackError, DevicePickStrategy, DevicePolicy, device_chain_from_env,
152 device_chain_from_env_key, device_from_env, device_from_env_key, device_report,
153 devices_for_with_policy, resolve_device, resolve_device_chain, run_with_fallback,
154};
155pub use device_router::DeviceRouter;
156pub use expert_pool::{
157 ExpertPool, ExpertPoolConfig, ExpertPoolStats, ExpertRefreshPolicy, ExpertRefreshResult,
158 MoEExecMode, gpu_expert_budget_from_vram,
159};
160pub use flexible_session::FlexibleSession;
161pub use graph_devices::{GraphDevices, graph_param_names};
162pub use kv_cache::LayerKvCache;
163pub use lm::{
164 ConfigSource, LmRunner, LmRunnerBuilder, MirostatMode, ModelRegistration,
165 PACKED_GGUF_AUTO_THRESHOLD_BYTES, SampleOpts, WeightFormat, auto_runner_name,
166 registered_models,
167};
168pub use memory_estimate::{
169 DEFAULT_SOFT_MEMORY_FRACTION, MoeOffloadEstimate, available_unified_memory,
170 estimate_moe_offload, llama_decode_bucket_compile_peak_bytes,
171 llama_decode_oneshot_compile_peak_bytes, memory_headroom_bytes, process_rss_bytes,
172 soft_memory_budget_bytes, soft_memory_fraction, would_exceed_soft_budget,
173};
174pub use model_pipeline::ModelCompilePipeline;
175pub use options::CompileOptions;
176pub use precision::Precision;
177pub use reflect::{ModelReflection, load_hir_template_with_extensions, specialize_entry};
178pub use registry::{BackendFactory, backend_for, register_backend, registered_devices};
179
180pub const MLX_COMPILE_OUTPUT_CAP_ENV: &str = COMPILE_OUTPUT_CAP_ENV;
182
183pub const DEFAULT_MLX_COMPILE_OUTPUT_CAP: usize = DEFAULT_COMPILE_OUTPUT_CAP;
185
186#[inline]
188pub fn mlx_compile_output_cap() -> usize {
189 compile_output_cap()
190}
191
192#[inline]
194pub fn set_mlx_compile_output_cap(cap: usize) {
195 set_compile_output_cap(cap);
196}
197
198#[inline]
200pub fn reset_mlx_compile_output_cap() {
201 reset_compile_output_cap();
202}
203
204#[cfg(feature = "cpu")]
205pub use rlx_cpu::moe_residency::MoeResidencyStats;
206#[cfg(feature = "cpu")]
207pub use rlx_cpu::moe_topk_capture::MoeTopkCapture;
208pub use rlx_driver::{ReduceKind, all_gather, all_reduce, reduce_scatter};
209pub use rlx_ir::env::{self, RlxEnv, RuntimeOverrides};
210pub use session::Session;
211pub use stages::{
212 compile_graph_stages, compile_graph_stages_for_backend, compile_hir_stages,
213 compile_module_stages, fusion_target_for, graph_from_lir, maybe_log_fusion,
214 options_with_supported_ops, pipeline_for,
215};
216pub use subgraph::{SubgraphCache, run_if, run_while};
217
218pub use expert_pool::{merged_resident_mask, per_layer_resident_masks};
219pub use moe_expert_store::{ExpertStackF32, LayerMoeWeights, MoeExpertStore};
220pub use weight_registry::{WeightEntry, WeightHandle, WeightKind, WeightRegistry};
221pub use weights::{BytesWeightLoader, WeightLoader};
222
223pub use rlx_ir::{AsyncCopy, BarrierToken, DoubleBuffer, SyncCopy};
227pub use rlx_ir::{CacheBuster, Tick, time_ns};
228
229pub use rlx_ir::{
231 inspect_graph, inspect_hir, inspect_hir_stats, inspect_lir, inspect_mir, inspect_mir_stats,
232};
233pub use rlx_opt::{OpKind, PrecisionPolicy};
234pub use rlx_opt::{PipelineInspect, inspect_pipeline};
235
236pub use rlx_ir::logical_kernel::{KernelDispatchConfig, KernelDispatchPolicy};
238pub use rlx_ir::op;
239pub use rlx_ir::{
240 BindingManifest, CompilationMode, DType, Graph, HirExtensionFn, HirReflection, IoBindingEntry,
241 ManifestDiff, ModelComponent, ModelPhase, ModelVariant, Node, NodeId, Op, RngBackend,
242 RngOptions, Shape, WeightBlock, apply_hir_extensions, register_hir_extension,
243 registered_hir_extensions,
244};
245
246pub use rlx_macros::pipeline_schedule;
248pub use rlx_macros::rlx_model;