[package]
edition = "2021"
name = "ferrum-kernels"
version = "0.7.7"
authors = ["Ferrum Team"]
build = "build.rs"
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Unified compute kernels (CUDA/Metal/CPU) and model runner for Ferrum inference"
readme = "README.md"
license = "MIT"
[features]
cuda = [
"candle-core/cuda",
"dep:cudarc",
"marlin",
"cudarc?/nccl",
]
default = []
fa2-source = ["cuda"]
marlin = []
metal = ["dep:metal"]
tensor-parallel = ["cuda"]
triton-kernels = ["cuda"]
vllm-marlin = ["cuda"]
vllm-moe-marlin = [
"cuda",
"vllm-marlin",
]
vllm-paged-attn-v2 = ["cuda"]
[lib]
name = "ferrum_kernels"
path = "src/lib.rs"
[[test]]
name = "attention_gemm_test"
path = "tests/attention_gemm_test.rs"
[[test]]
name = "attention_metal_test"
path = "tests/attention_metal_test.rs"
[[test]]
name = "attention_paged_attention_test"
path = "tests/attention_paged_attention_test.rs"
[[test]]
name = "attention_paged_kv_append_test"
path = "tests/attention_paged_kv_append_test.rs"
[[test]]
name = "attention_transformer_test"
path = "tests/attention_transformer_test.rs"
[[test]]
name = "cudarc_graph_repro"
path = "tests/cudarc_graph_repro.rs"
[[test]]
name = "flash_attn_batched_eq"
path = "tests/flash_attn_batched_eq.rs"
[[test]]
name = "int8_kv_parity"
path = "tests/int8_kv_parity.rs"
[[test]]
name = "kv_cache_append_batched_eq"
path = "tests/kv_cache_append_batched_eq.rs"
[[test]]
name = "moe_align_block_size_eq"
path = "tests/moe_align_block_size_eq.rs"
[[test]]
name = "paged_decode_attn_bench"
path = "tests/paged_decode_attn_bench.rs"
[[test]]
name = "qk_norm_rope_batched_eq"
path = "tests/qk_norm_rope_batched_eq.rs"
[[test]]
name = "triton_add_bias_eq"
path = "tests/triton_add_bias_eq.rs"
[[test]]
name = "triton_fused_add_rms_norm_eq"
path = "tests/triton_fused_add_rms_norm_eq.rs"
[[test]]
name = "triton_fused_moe_bench"
path = "tests/triton_fused_moe_bench.rs"
[[test]]
name = "triton_fused_moe_eq"
path = "tests/triton_fused_moe_eq.rs"
[[test]]
name = "triton_fused_moe_tile_sweep"
path = "tests/triton_fused_moe_tile_sweep.rs"
[[test]]
name = "triton_fused_silu_mul_eq"
path = "tests/triton_fused_silu_mul_eq.rs"
[[test]]
name = "triton_gelu_eq"
path = "tests/triton_gelu_eq.rs"
[[test]]
name = "triton_layer_norm_eq"
path = "tests/triton_layer_norm_eq.rs"
[[test]]
name = "triton_residual_add_eq"
path = "tests/triton_residual_add_eq.rs"
[[test]]
name = "triton_residual_add_inplace_eq"
path = "tests/triton_residual_add_inplace_eq.rs"
[[test]]
name = "triton_rms_norm_eq"
path = "tests/triton_rms_norm_eq.rs"
[[test]]
name = "triton_softmax_eq"
path = "tests/triton_softmax_eq.rs"
[[test]]
name = "triton_w4a16_eq"
path = "tests/triton_w4a16_eq.rs"
[[test]]
name = "vllm_gptq_repack_smoke"
path = "tests/vllm_gptq_repack_smoke.rs"
[dependencies.candle-core]
version = "0.9.2"
[dependencies.cudarc]
version = "0.19"
features = [
"driver",
"cublas",
"f16",
"nvrtc",
"cuda-version-from-build-system",
]
optional = true
default-features = false
[dependencies.ferrum-bench-core]
version = "0.7.7"
[dependencies.ferrum-interfaces]
version = "0.7.7"
[dependencies.ferrum-types]
version = "0.7.7"
[dependencies.half]
version = "2.5.0"
features = [
"num-traits",
"use-intrinsics",
"rand_distr",
]
[dependencies.metal]
version = "0.31"
optional = true
[dependencies.rayon]
version = "1.11"
[dependencies.serde_json]
version = "1.0"
[dependencies.tracing]
version = "0.1"
[build-dependencies.bindgen_cuda]
version = "0.1.6"