[package]
name = "numr"
version = "0.5.1"
edition = "2024"
rust-version = "1.89"
description = "High-performance numerical computing with multi-backend GPU acceleration (CPU/CUDA/WebGPU)"
license = "Apache-2.0"
repository = "https://github.com/ml-rust/numr"
documentation = "https://docs.rs/numr"
keywords = ["tensor", "ndarray", "linear-algebra", "gpu", "fft"]
categories = ["science", "mathematics", "data-structures"]
[package.metadata.docs.rs]
features = ["f16", "sparse"]
[features]
default = ["rayon"]
cuda = ["dep:cudarc"]
nccl = ["cuda", "cudarc?/nccl"]
distributed = ["dep:nexar", "dep:tokio"]
distributed-gpu = ["distributed", "nccl", "dep:nexar-nccl"]
wgpu = ["dep:wgpu", "dep:pollster"]
rayon = ["dep:rayon"]
f16 = [
"dep:half",
"cudarc?/f16",
]
fp8 = [
]
sparse = []
[dependencies]
thiserror = "2.0"
smallvec = "1"
bytemuck = { version = "1.24", features = ["derive"] }
num-traits = "0.2"
parking_lot = "0.12"
rayon = { version = "1.11", optional = true }
rkyv = "0.8"
half = { version = "2.7", optional = true, features = [
"bytemuck",
"num-traits",
] }
nexar = { version = "0.1", optional = true }
nexar-nccl = { version = "0.1", optional = true }
tokio = { version = "1", features = ["rt"], optional = true }
cudarc = { version = "0.19", optional = true, features = [
"cuda-version-from-build-system",
] }
wgpu = { version = "28.0", optional = true }
pollster = { version = "0.4", optional = true }
paste = "1.0"
[dev-dependencies]
approx = "0.5"
rand = "0.9"
fluxbench = "0.1"
ndarray = "0.16"
nalgebra = "0.33"
[[bench]]
name = "matmul"
harness = false
[[bench]]
name = "reduce"
harness = false
[[bench]]
name = "fft"
harness = false
[[bench]]
name = "indexing"
harness = false
[[bench]]
name = "shape_ops"
harness = false
[[bench]]
name = "parallelism"
harness = false
[[bench]]
name = "ci_regression"
harness = false
[profile.release]
lto = "thin"
codegen-units = 1
[profile.bench]
lto = "thin"
codegen-units = 1