pub struct RuntimeConfig {Show 15 fields
pub model_path: PathBuf,
pub format: ModelFormat,
pub host: String,
pub port: u16,
pub context_size: u32,
pub batch_size: u32,
pub threads: u32,
pub gpu_layers: u32,
pub parallel_slots: u32,
pub ubatch_size: u32,
pub runtime_binary: Option<PathBuf>,
pub draft_model_path: Option<PathBuf>,
pub speculative_draft_max: u32,
pub speculative_draft_p_min: f32,
pub extra_config: Value,
}Expand description
Runtime configuration for model initialization
Fields§
§model_path: PathBufPath to model file
format: ModelFormatModel format
host: StringHost for runtime server (e.g., “127.0.0.1”)
port: u16Port for runtime server (e.g., 8001)
context_size: u32Context size
batch_size: u32Batch size
threads: u32Number of CPU threads
gpu_layers: u32GPU layers to offload (0 = CPU only)
parallel_slots: u32Number of parallel KV-cache slots (continuous batching slots). Should match MAX_CONCURRENT_STREAMS. Maps to llama-server –parallel N.
ubatch_size: u32Micro-batch size for GPU compute. Larger values increase tensor-core utilisation. Maps to llama-server –ubatch-size N.
runtime_binary: Option<PathBuf>Path to runtime binary (e.g., llama-server.exe)
draft_model_path: Option<PathBuf>Path to draft model for speculative decoding. None = disabled. Maps to llama-server –model-draft.
speculative_draft_max: u32Maximum draft tokens generated per speculative step. Maps to llama-server –draft-max. Default: 8.
speculative_draft_p_min: f32Minimum acceptance probability for a draft token. Maps to llama-server –draft-p-min. Default: 0.4.
extra_config: ValueAdditional runtime-specific configuration
Trait Implementations§
Source§impl Clone for RuntimeConfig
impl Clone for RuntimeConfig
Source§fn clone(&self) -> RuntimeConfig
fn clone(&self) -> RuntimeConfig
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for RuntimeConfig
impl Debug for RuntimeConfig
Source§impl Default for RuntimeConfig
impl Default for RuntimeConfig
Source§impl<'de> Deserialize<'de> for RuntimeConfig
impl<'de> Deserialize<'de> for RuntimeConfig
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Auto Trait Implementations§
impl Freeze for RuntimeConfig
impl RefUnwindSafe for RuntimeConfig
impl Send for RuntimeConfig
impl Sync for RuntimeConfig
impl Unpin for RuntimeConfig
impl UnsafeUnpin for RuntimeConfig
impl UnwindSafe for RuntimeConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more