realizar 0.8.4

Pure Rust ML inference engine built from scratch - model serving for GGUF and safetensors
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//! Token generation for OwnedQuantizedModel
//!
//! Contains generate, generate_with_cache, generate_with_cache_streaming,
//! generate_with_scratch, and sampling methods.

use crate::brick::BrickProfiler;
use crate::error::{RealizarError, Result};
use crate::gguf::ops;
#[cfg(feature = "gpu")]
use crate::gguf::DispatchMetrics;
use crate::gguf::{
    InferenceScratchBuffer, OwnedQuantizedKVCache, OwnedQuantizedModel, QuantizedGenerateConfig,
};
use rand::Rng;

include!("generate_scratch.rs");
include!("generation_argmax_basic.rs");