[package]
name = "rage-quant"
version = "0.1.0"
edition = "2021"
authors = ["Carlos Enrique Castro Lazaro <the@angriestboy.com>"]
description = "High-performance quantized GEMV kernels for CPU-only LLM inference. Direct dot product on Q8_0/Q6_K/Q4_K GGUF blocks with AVX2+FMA SIMD — 3.0x decode speedup."
license = "AGPL-3.0-only"
repository = "https://github.com/OnCeUponTry/rage-quant"
homepage = "https://github.com/OnCeUponTry/rage-quant"
documentation = "https://docs.rs/rage-quant"
readme = "README.md"
keywords = ["llm", "inference", "quantization", "simd", "gguf"]
categories = ["science", "algorithms"]
[dependencies]
half = "2.4"
anyhow = "1.0"
rayon = "1.10"
gemm = "0.19"
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
[[bench]]
name = "gemv_benchmark"
harness = false