llama-rs 0.16.1

A high-performance Rust implementation of llama.cpp - LLM inference engine with full GGUF support
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
//! Quantization formats and dequantization functions

mod blocks;
mod dequant;
mod gpu_quantize;
mod iq_grids;
mod iq_quants;

pub use blocks::*;
pub use dequant::*;
pub use gpu_quantize::*;
pub use iq_quants::*;