rage-quant 0.1.0

High-performance quantized GEMV kernels for CPU-only LLM inference. Direct dot product on Q8_0/Q6_K/Q4_K GGUF blocks with AVX2+FMA SIMD — 3.0x decode speedup.
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
name = "rage-quant"
version = "0.1.0"
authors = ["Carlos Enrique Castro Lazaro <the@angriestboy.com>"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "High-performance quantized GEMV kernels for CPU-only LLM inference. Direct dot product on Q8_0/Q6_K/Q4_K GGUF blocks with AVX2+FMA SIMD — 3.0x decode speedup."
homepage = "https://github.com/OnCeUponTry/rage-quant"
documentation = "https://docs.rs/rage-quant"
readme = "README.md"
keywords = [
    "llm",
    "inference",
    "quantization",
    "simd",
    "gguf",
]
categories = [
    "science",
    "algorithms",
]
license = "AGPL-3.0-only"
repository = "https://github.com/OnCeUponTry/rage-quant"

[lib]
name = "rage_quant"
path = "src/lib.rs"

[[bench]]
name = "gemv_benchmark"
path = "benches/gemv_benchmark.rs"
harness = false

[dependencies.anyhow]
version = "1.0"

[dependencies.gemm]
version = "0.19"

[dependencies.half]
version = "2.4"

[dependencies.rayon]
version = "1.10"

[dev-dependencies.criterion]
version = "0.5"
features = ["html_reports"]