[package]
edition = "2021"
name = "rage-quant"
version = "0.1.0"
authors = ["Carlos Enrique Castro Lazaro <the@angriestboy.com>"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "High-performance quantized GEMV kernels for CPU-only LLM inference. Direct dot product on Q8_0/Q6_K/Q4_K GGUF blocks with AVX2+FMA SIMD — 3.0x decode speedup."
homepage = "https://github.com/OnCeUponTry/rage-quant"
documentation = "https://docs.rs/rage-quant"
readme = "README.md"
keywords = [
"llm",
"inference",
"quantization",
"simd",
"gguf",
]
categories = [
"science",
"algorithms",
]
license = "AGPL-3.0-only"
repository = "https://github.com/OnCeUponTry/rage-quant"
[lib]
name = "rage_quant"
path = "src/lib.rs"
[[bench]]
name = "gemv_benchmark"
path = "benches/gemv_benchmark.rs"
harness = false
[dependencies.anyhow]
version = "1.0"
[dependencies.gemm]
version = "0.19"
[dependencies.half]
version = "2.4"
[dependencies.rayon]
version = "1.10"
[dev-dependencies.criterion]
version = "0.5"
features = ["html_reports"]