[package]
name = "ferrum-quantization"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
description = "Weight-format abstraction (Dense / GPTQ / AWQ / GGUF) for Ferrum models"
readme = "../../README.md"
[dependencies]
ferrum-types = { workspace = true }
ferrum-kernels = { workspace = true }
candle-core = { workspace = true }
tracing = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
safetensors = { workspace = true }
memmap2 = "0.9"
half = { workspace = true }
[dev-dependencies]
tempfile = "3.8"
[features]
default = []
metal = ["ferrum-kernels/metal"]
cuda = ["ferrum-kernels/cuda"]
vllm-moe-marlin = ["cuda", "ferrum-kernels/vllm-moe-marlin"]