aprender-gpu 0.29.0

Pure Rust PTX generation for NVIDIA CUDA - no LLVM, no nvcc
Documentation
[package]
name = "aprender-gpu"
version.workspace = true
edition = "2021"
authors = ["Pragmatic AI Labs"]
license = "MIT"
description = "Pure Rust PTX generation for NVIDIA CUDA - no LLVM, no nvcc"
repository = "https://github.com/paiml/trueno"
readme = "README.md"
keywords = ["cuda", "ptx", "gpu", "simd", "nvidia"]
categories = ["algorithms", "mathematics", "science"]

[dependencies]
# NOTE: trueno-gpu does NOT depend on trueno (would create cycle)
# ComputeBrick is available from trueno crate directly
# Users: `use trueno::brick::{ComputeBrick, ComputeBackend};`
thiserror = "2.0"
batuta-common = "0.1"
# Dynamic library loading for libcuda.so/nvcuda.dll
# OWN THE STACK: We write our own CUDA FFI in driver/sys.rs (~400 lines)
# Citation: RustBelt [1] proves Rust's type system safely encapsulates unsafe FFI
libloading = { version = "0.8", optional = true }
# WASM support
wasm-bindgen = { version = "0.2", optional = true }
# Sovereign Stack - visual testing (optional to avoid version conflicts)
trueno-viz = { version = "0.1.4", optional = true }
# Simular for deterministic RNG and TUI monitoring (Sovereign Stack)
simular = { version = "0.2.0", optional = true }
# Renacer for profiling and anomaly detection (Sovereign Stack)
renacer = { version = "0.10", optional = true }
# TUI support via presentar (optional, for stress test monitor)
presentar-core = { version = "0.3", optional = true }
presentar-terminal = { version = "0.3", optional = true }
crossterm = { version = "0.28", optional = true }
# WGPU for cross-platform WebGPU compute (Vulkan/Metal/DX12/WebGPU)
wgpu = { version = "24", optional = true }

# Apple Metal backend via manzana (macOS only)
[target.'cfg(target_os = "macos")'.dependencies]
manzana = { version = "0.2.0", optional = true }

[dev-dependencies]
proptest = "1.9"
criterion = "0.7"
simular = "0.2.0"
pollster = "0.4"
bytemuck = { version = "1.21", features = ["derive"] }

# renacer is Linux-only (uses ptrace syscalls)
[target.'cfg(target_os = "linux")'.dev-dependencies]
renacer = "0.10"

[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
# GPU pixel testing with TUI visualization (Sovereign Stack)
jugar-probar = { version = "0.4.0", optional = true }

[features]
default = []
# Enable CUDA driver FFI for actual GPU execution (requires NVIDIA driver)
# Uses our own driver/sys.rs FFI layer, not external bindings
cuda = ["dep:libloading"]
# WASM visual testing
wasm = ["dep:wasm-bindgen"]
# Visual testing with trueno-viz (enables gpu_renderer)
viz = ["dep:trueno-viz"]
# Stress testing with randomized inputs (native only)
stress-test = ["dep:simular", "dep:renacer"]
# TUI monitoring mode for stress tests
tui-monitor = ["stress-test", "dep:presentar-core", "dep:presentar-terminal", "dep:crossterm"]
# GPU pixel testing with probar TUI visualization
gpu-pixels = ["dep:jugar-probar", "dep:presentar-core", "dep:presentar-terminal", "dep:crossterm"]
# WGPU backend for cross-platform GPU compute (WebGPU via wgpu crate)
wgpu = ["dep:wgpu"]
# Apple Metal backend via manzana (macOS only)
metal = ["dep:manzana"]

[package.metadata.wasm-pack.profile.release]
wasm-opt = false

# Inherit workspace lints
[lints.rust]
unsafe_code = "allow"


[lib]
name = "trueno_gpu"
path = "src/lib.rs"
crate-type = ["cdylib", "rlib"]

[[bench]]
name = "ptx_gen"
harness = false

# cuBLAS benchmark: cargo test -p trueno-gpu --features cuda --lib -- cublas_bench --no-capture --release