pub struct Config {Show 27 fields
pub model_path: String,
pub llama_bin: String,
pub llama_host: String,
pub llama_port: u16,
pub ctx_size: u32,
pub batch_size: u32,
pub threads: u32,
pub gpu_layers: u32,
pub health_timeout_seconds: u64,
pub hot_swap_grace_seconds: u64,
pub max_concurrent_streams: u32,
pub parallel_slots: u32,
pub ubatch_size: u32,
pub prometheus_port: u16,
pub api_host: String,
pub api_port: u16,
pub requests_per_second: u32,
pub generate_timeout_seconds: u64,
pub stream_timeout_seconds: u64,
pub health_check_timeout_seconds: u64,
pub queue_size: usize,
pub queue_timeout_seconds: u64,
pub backend_url: String,
pub openrouter_api_key: String,
pub draft_model_path: String,
pub speculative_draft_max: u32,
pub speculative_draft_p_min: f32,
}Fields§
§model_path: String§llama_bin: String§llama_host: String§llama_port: u16§ctx_size: u32§batch_size: u32§threads: u32§gpu_layers: u32§health_timeout_seconds: u64§hot_swap_grace_seconds: u64§max_concurrent_streams: u32§parallel_slots: u32§ubatch_size: u32§prometheus_port: u16§api_host: String§api_port: u16§requests_per_second: u32§generate_timeout_seconds: u64§stream_timeout_seconds: u64§health_check_timeout_seconds: u64§queue_size: usize§queue_timeout_seconds: u64§backend_url: String§openrouter_api_key: String§draft_model_path: StringPath to draft model for speculative decoding (empty string = disabled). Set DRAFT_MODEL_PATH in .env to enable. The draft model should be a smaller version of the main model (e.g. 0.5B for a 3B main model).
speculative_draft_max: u32Maximum number of draft tokens the draft model generates per step. Higher values increase throughput gains but reduce acceptance rate. Maps to llama-server –draft-max. Default: 8.
speculative_draft_p_min: f32Minimum acceptance probability for a draft token to be kept. Tokens below this threshold are rejected early. Default: 0.4.
Implementations§
Trait Implementations§
Auto Trait Implementations§
impl Freeze for Config
impl RefUnwindSafe for Config
impl Send for Config
impl Sync for Config
impl Unpin for Config
impl UnsafeUnpin for Config
impl UnwindSafe for Config
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more