forgellm-runtime 0.7.6

Minimal runtime for ForgeLLM (KV cache, sampling, tokenizer, API server)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
//! ForgeLLM Runtime — minimal inference runtime.
//!
//! Provides KV cache management, token sampling,
//! and tokenizer integration for compiled models.

pub mod chat;
pub mod interpreter;
pub mod kernels;
pub mod kv_cache;
pub mod sampling;
pub mod tokenizer;

pub fn version() -> &'static str {
    env!("CARGO_PKG_VERSION")
}