rlx-runtime 0.2.2

[package]
name = "rlx-runtime"
version                = "0.2.2"
edition.workspace      = true
rust-version.workspace = true
description = "RLX runtime — feature-gated backends, session API, compile+execute pipeline"
authors.workspace      = true
license.workspace      = true
keywords.workspace     = true
categories.workspace   = true
readme                 = "README.md"
repository.workspace   = true
homepage.workspace     = true
documentation.workspace = true
[features]
default = ["cpu"]

# ── CPU backends ─────────────────────────────────────────────
# Base CPU with SIMD kernels (NEON/AVX) + persistent thread pool
cpu = ["dep:rlx-cpu", "rlx-cpu/splat"]

# Compile-time gated kernel tracing (plan #7).
# Off by default — production builds pay zero overhead; turn on for
# debugging op dispatch / shapes / per-kernel timing.
kernel-trace = []

# NaN/inf check epilogues (plan #18).
# When on, every CompiledGraph::run output is scanned for NaN/inf
# and the offending output node is reported. Off by default since
# the scan is O(output_size).
nan-check = []

# Mock request fixtures (plan #64). Off by default — only compiled
# in when explicitly requested OR during tests. Turn on if a
# downstream serving crate wants the canned OpenAI-shaped payloads.
mock-requests = []

# BLAS acceleration (pick ONE per platform)
blas-accelerate = ["cpu"]          # macOS: Apple Accelerate (AMX, fastest on Apple Silicon)
blas-mkl = ["cpu"]                 # Intel/AMD: Intel MKL
blas-openblas = ["cpu"]            # Cross-platform: OpenBLAS

# Future: direct hardware access. Declared (empty) so device_ext.rs's
# `cfg!(feature = "...")` checks compile cleanly; flip to real deps
# once the corresponding backend crates land.
ane = []                           # Apple Neural Engine (via CoreML/ANE framework)
# amx = ["cpu"]                    # Apple AMX coprocessor (via Accelerate internally)
# mlas = ["cpu"]                   # Microsoft MLAS (from ONNX Runtime)

# ── GPU backends ─────────────────────────────────────────────
# Portable GPU via wgpu (Metal on macOS, Vulkan on Linux, DX12 on Windows, WebGPU in browsers)
gpu = ["dep:rlx-wgpu", "rlx-wgpu/splat"]

# Apple Metal (MPSGraph + custom MSL). Splat: CPU reference + common IR.
metal = ["dep:rlx-metal", "rlx-cpu/splat"]

# Apple MLX (unified memory); splat via CPU reference on host buffers.
mlx = ["dep:rlx-mlx", "rlx-cpu/splat"]

# All Apple RLX backends: Metal (incl. MPS), MLX, and portable wgpu-on-Metal.
apple = ["metal", "mlx", "gpu"]

# NVIDIA CUDA native; native CUDA tile raster.
cuda = ["dep:rlx-cuda", "rlx-cpu/splat"]

# AMD GPU (ROCm/HIP); splat via CPU reference between segments.
rocm = ["dep:rlx-rocm", "rlx-cpu/splat"]

# Google TPU via libtpu's PJRT plugin
tpu = ["dep:rlx-tpu", "rlx-cpu/splat"]

# ── Cross-platform GPU APIs ─────────────────────────────────
# Declared (empty) so device_ext.rs's `cfg!(feature = "...")` checks
# compile cleanly. Flip to `dep:rlx-vulkan` etc. once each backend
# crate lands.
# Vulkan via wgpu (`Device::Vulkan` — separate adapter from `Device::Gpu`).
vulkan = ["gpu"]
opengl = []                        # OpenGL compute shaders (legacy)
directx = []                       # DirectX 12 compute (Windows)
webgpu = []                        # WebGPU (WASM target)

[dependencies]
rlx-ir = { path = "../rlx-ir", version = "0.2.2", features = ["serialize"] }
rlx-opt = { path = "../rlx-opt", version = "0.2.2" }
rlx-driver = { path = "../rlx-driver", version = "0.2.1" }
rlx-cpu = { path = "../rlx-cpu", version = "0.2.3", optional = true }
rlx-metal = { path = "../rlx-metal", version = "0.2.2", optional = true }
rlx-mlx = { path = "../rlx-mlx", version = "0.2.2", optional = true }
rlx-wgpu = { path = "../rlx-wgpu", version = "0.2.3", optional = true }
rlx-cuda = { path = "../rlx-cuda", version = "0.2.2", optional = true }
rlx-rocm = { path = "../rlx-rocm", version = "0.2.2", optional = true }
rlx-tpu = { path = "../rlx-tpu", version = "0.2.2", optional = true }
rlx-macros = { path = "../rlx-macros", version = "0.2.1" }
half = "2"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
# rlx-gpu = { path = "../rlx-gpu", version = "0.2.1", optional = true }
# rlx-cuda = { path = "../rlx-cuda", version = "0.2.2", optional = true }

[dev-dependencies]
rlx-autodiff = { path = "../rlx-autodiff", version = "0.2.2" }
rlx-compile = { path = "../rlx-compile", version = "0.2.2", features = ["training"] }
rlx-splat = { path = "../rlx-splat", version = "0.2.1", default-features = false, features = ["test-support", "io"] }
# Downstream demonstration packages used in integration tests.
# Enable both `metal` and `mlx` features so the per-backend tests
# can register rlx-sparse's per-backend kernels. On non-macOS these
# are no-ops (each kernels module is cfg-gated to target_os = "macos").
rlx-sparse = { path = "../rlx-sparse", version = "0.2.1", default-features = false, features = ["cpu"] }
rlx-linalg = { path = "../rlx-linalg", version = "0.2.1" }
# Used by dequant_matmul_gguf.rs to construct reference GGUF block
# bytes and verify the in-graph Op::DequantMatMul (GGUF schemes)
# matches a manual dequant+sgemm.
rlx-gguf = { path = "../rlx-gguf", version = "0.2.1" }
anyhow = "1"
half = "2"

[lints]
workspace = true

# docs.rs builds in a Linux container without macOS (Metal/MLX) or
# vendor (CUDA/ROCm/TPU) toolchains. Enable the cross-platform
# backends so the rendered docs cover Session / CompiledGraph plus the
# wgpu dispatch path.
[[example]]
name = "splat_decomposed_session"
required-features = ["cpu"]

[[example]]
name = "splat_render_ply_session"
required-features = ["cpu"]

[[example]]
name = "splat_common_splat_session"
required-features = ["cpu"]

[package.metadata.docs.rs]
features     = ["cpu", "gpu"]
rustdoc-args = ["--cfg", "docsrs"]
all-features = false