burn_dragon_kernel 0.5.0

Fused GPU kernel crate for burn_dragon execution paths
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2024"
rust-version = "1.92"
name = "burn_dragon_kernel"
version = "0.5.0"
authors = ["mosure <mitchell@mosure.me>"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Fused GPU kernel crate for burn_dragon execution paths"
homepage = "https://github.com/mosure/burn_bdh"
readme = "README.md"
keywords = [
    "burn",
    "cuda",
    "kernel",
    "recurrent",
]
categories = [
    "science",
    "science::neuroscience",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/mosure/burn_bdh"

[features]
cuda = [
    "dep:burn-cuda",
    "dep:cudarc",
]
default = []

[lib]
name = "burn_dragon_kernel"
crate-type = ["rlib"]
path = "src/lib.rs"

[[bin]]
name = "dense_causal_attention_autodiff_cuda_bench"
path = "src/bin/dense_causal_attention_autodiff_cuda_bench.rs"

[[bin]]
name = "dense_causal_attention_bench"
path = "src/bin/dense_causal_attention_bench.rs"

[[bin]]
name = "low_bit_bench"
path = "src/bin/low_bit_bench.rs"

[[bin]]
name = "low_bit_cuda_bench"
path = "src/bin/low_bit_cuda_bench.rs"

[[bin]]
name = "mamba2_bench"
path = "src/bin/mamba2_bench.rs"

[[bin]]
name = "mamba2_cuda_bench"
path = "src/bin/mamba2_cuda_bench.rs"

[[bin]]
name = "mamba3_bench"
path = "src/bin/mamba3_bench.rs"

[[bin]]
name = "mamba_bench"
path = "src/bin/mamba_bench.rs"

[[bin]]
name = "recurrent_autodiff_cuda_bench"
path = "src/bin/recurrent_autodiff_cuda_bench.rs"

[[bin]]
name = "recurrent_autodiff_wgpu_bench"
path = "src/bin/recurrent_autodiff_wgpu_bench.rs"

[[bin]]
name = "recurrent_bench"
path = "src/bin/recurrent_bench.rs"

[[bin]]
name = "recurrent_cuda_bench"
path = "src/bin/recurrent_cuda_bench.rs"

[[bin]]
name = "rwkv8_bench"
path = "src/bin/rwkv8_bench.rs"

[[bin]]
name = "rwkv8_cuda_bench"
path = "src/bin/rwkv8_cuda_bench.rs"

[[test]]
name = "low_bit_fused"
path = "tests/low_bit_fused.rs"

[dependencies.burn]
version = "0.21.0-pre.3"
features = [
    "fusion",
    "ndarray",
    "std",
    "wgpu",
]
default-features = false

[dependencies.burn-autodiff]
version = "0.21.0-pre.3"
default-features = false

[dependencies.burn-cubecl]
version = "0.21.0-pre.3"
features = [
    "std",
    "fusion",
]
default-features = false

[dependencies.burn-cuda]
version = "0.21.0-pre.3"
features = ["std"]
optional = true
default-features = false

[dependencies.burn-fusion]
version = "0.21.0-pre.3"
features = ["std"]
default-features = false

[dependencies.burn-ir]
version = "0.21.0-pre.3"

[dependencies.burn-std]
version = "0.21.0-pre.3"

[dependencies.burn-wgpu]
version = "0.21.0-pre.3"
features = [
    "fusion",
    "std",
    "template",
]
default-features = false

[dependencies.cudarc]
version = "0.17.8"
features = ["cuda-version-from-build-system"]
optional = true

[dependencies.pollster]
version = "0.3"

[dependencies.serde]
version = "1"
features = ["derive"]

[dependencies.serde_json]
version = "1"

[dependencies.wgpu]
version = "26"

[dev-dependencies.burn-ndarray]
version = "0.21.0-pre.3"