realizar 0.8.5

Pure Rust ML inference engine built from scratch - model serving for GGUF and safetensors
1
2
3
4
5
6
7
8
9
10
11
12
13
14
//! CUDA Graph-captured forward pass operations
//!
//! Extracted from layer.rs (PMAT-802) to reduce module size.
//! Contains:
//! - PAR-054: forward_all_layers_gpu_to_logits_graphed
//! - PAR-062: gpu_argmax
//! - PAR-062: forward_graphed_replay_to_token_id

#![allow(clippy::wildcard_imports)]

use super::super::*;

include!("forward_workspace_captured.rs");
include!("graphed_03.rs");