ferrum-kernels 0.7.7

Unified compute kernels (CUDA/Metal/CPU) and model runner for Ferrum inference
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
name = "ferrum-kernels"
version = "0.7.7"
authors = ["Ferrum Team"]
build = "build.rs"
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Unified compute kernels (CUDA/Metal/CPU) and model runner for Ferrum inference"
readme = "README.md"
license = "MIT"

[features]
cuda = [
    "candle-core/cuda",
    "dep:cudarc",
    "marlin",
    "cudarc?/nccl",
]
default = []
fa2-source = ["cuda"]
marlin = []
metal = ["dep:metal"]
tensor-parallel = ["cuda"]
triton-kernels = ["cuda"]
vllm-marlin = ["cuda"]
vllm-moe-marlin = [
    "cuda",
    "vllm-marlin",
]
vllm-paged-attn-v2 = ["cuda"]

[lib]
name = "ferrum_kernels"
path = "src/lib.rs"

[[test]]
name = "attention_gemm_test"
path = "tests/attention_gemm_test.rs"

[[test]]
name = "attention_metal_test"
path = "tests/attention_metal_test.rs"

[[test]]
name = "attention_paged_attention_test"
path = "tests/attention_paged_attention_test.rs"

[[test]]
name = "attention_paged_kv_append_test"
path = "tests/attention_paged_kv_append_test.rs"

[[test]]
name = "attention_transformer_test"
path = "tests/attention_transformer_test.rs"

[[test]]
name = "cudarc_graph_repro"
path = "tests/cudarc_graph_repro.rs"

[[test]]
name = "flash_attn_batched_eq"
path = "tests/flash_attn_batched_eq.rs"

[[test]]
name = "int8_kv_parity"
path = "tests/int8_kv_parity.rs"

[[test]]
name = "kv_cache_append_batched_eq"
path = "tests/kv_cache_append_batched_eq.rs"

[[test]]
name = "moe_align_block_size_eq"
path = "tests/moe_align_block_size_eq.rs"

[[test]]
name = "paged_decode_attn_bench"
path = "tests/paged_decode_attn_bench.rs"

[[test]]
name = "qk_norm_rope_batched_eq"
path = "tests/qk_norm_rope_batched_eq.rs"

[[test]]
name = "triton_add_bias_eq"
path = "tests/triton_add_bias_eq.rs"

[[test]]
name = "triton_fused_add_rms_norm_eq"
path = "tests/triton_fused_add_rms_norm_eq.rs"

[[test]]
name = "triton_fused_moe_bench"
path = "tests/triton_fused_moe_bench.rs"

[[test]]
name = "triton_fused_moe_eq"
path = "tests/triton_fused_moe_eq.rs"

[[test]]
name = "triton_fused_moe_tile_sweep"
path = "tests/triton_fused_moe_tile_sweep.rs"

[[test]]
name = "triton_fused_silu_mul_eq"
path = "tests/triton_fused_silu_mul_eq.rs"

[[test]]
name = "triton_gelu_eq"
path = "tests/triton_gelu_eq.rs"

[[test]]
name = "triton_layer_norm_eq"
path = "tests/triton_layer_norm_eq.rs"

[[test]]
name = "triton_residual_add_eq"
path = "tests/triton_residual_add_eq.rs"

[[test]]
name = "triton_residual_add_inplace_eq"
path = "tests/triton_residual_add_inplace_eq.rs"

[[test]]
name = "triton_rms_norm_eq"
path = "tests/triton_rms_norm_eq.rs"

[[test]]
name = "triton_softmax_eq"
path = "tests/triton_softmax_eq.rs"

[[test]]
name = "triton_w4a16_eq"
path = "tests/triton_w4a16_eq.rs"

[[test]]
name = "vllm_gptq_repack_smoke"
path = "tests/vllm_gptq_repack_smoke.rs"

[dependencies.candle-core]
version = "0.9.2"

[dependencies.cudarc]
version = "0.19"
features = [
    "driver",
    "cublas",
    "f16",
    "nvrtc",
    "cuda-version-from-build-system",
]
optional = true
default-features = false

[dependencies.ferrum-bench-core]
version = "0.7.7"

[dependencies.ferrum-interfaces]
version = "0.7.7"

[dependencies.ferrum-types]
version = "0.7.7"

[dependencies.half]
version = "2.5.0"
features = [
    "num-traits",
    "use-intrinsics",
    "rand_distr",
]

[dependencies.metal]
version = "0.31"
optional = true

[dependencies.rayon]
version = "1.11"

[dependencies.serde_json]
version = "1.0"

[dependencies.tracing]
version = "0.1"

[build-dependencies.bindgen_cuda]
version = "0.1.6"