realizar 0.8.5

Pure Rust ML inference engine built from scratch - model serving for GGUF and safetensors
1
2
3
4
5
6
7
8
9
10
11
12
//! Q4K quantized GEMV operations
//!
//! This module implements Q4_K dequantization and matrix-vector multiplication
//! for efficient inference with 4-bit quantized weights.

#![allow(clippy::wildcard_imports)]
#![allow(clippy::too_many_arguments)]

use super::*;

include!("q4k_tiled_gemv.rs");
include!("q4k_tests_gemv_cached.rs");