realizar 0.8.5

Pure Rust ML inference engine built from scratch - model serving for GGUF and safetensors
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
//! Multi-stream async execution and GEMM/GEMV operations
//!
//! This module implements:
//! - PARITY-038: Multi-Stream Async Execution
//! - GEMM operations (tiled, optimized, fused)
//! - GEMV operations for M=1 token generation
//! - Softmax kernel
//! - Q4K/Q5K/Q6K GEMV with direct weight transfer

#![allow(clippy::wildcard_imports)] // Internal module organization uses super::*

use super::*;

include!("fused.rs");
include!("gemm_tests.rs");