ferrum-quantization 0.7.7

Weight-format abstraction (Dense / GPTQ / AWQ / GGUF) for Ferrum models
Documentation
[package]
name = "ferrum-quantization"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
description = "Weight-format abstraction (Dense / GPTQ / AWQ / GGUF) for Ferrum models"
readme = "../../README.md"

[dependencies]
ferrum-types = { workspace = true }
ferrum-kernels = { workspace = true }
candle-core = { workspace = true }
tracing = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
safetensors = { workspace = true }
memmap2 = "0.9"
half = { workspace = true }

[dev-dependencies]
tempfile = "3.8"

[features]
default = []
metal = ["ferrum-kernels/metal"]
cuda = ["ferrum-kernels/cuda"]
vllm-moe-marlin = ["cuda", "ferrum-kernels/vllm-moe-marlin"]