Skip to main content

ModelSettings

Struct ModelSettings 

Source
pub struct ModelSettings {
Show 81 fields pub context_length: u32, pub threads: u32, pub threads_batch: u32, pub batch_size: u32, pub ubatch_size: u32, pub parallel: u32, pub max_concurrent_predictions: Option<u32>, pub uniform_cache: bool, pub kv_cache_offload: bool, pub cache_type_k: Option<CacheTypeK>, pub cache_type_v: Option<CacheTypeV>, pub keep: i32, pub swa_full: bool, pub mlock: bool, pub mmap: bool, pub numa: NumMode, pub system_prompt: String, pub system_prompt_preset_name: String, pub gpu_layers_mode: GpuLayersMode, pub split_mode: SplitMode, pub tensor_split: String, pub main_gpu: i32, pub fit: bool, pub lora: Option<PathBuf>, pub lora_scaled: Option<(PathBuf, f32)>, pub rpc: String, pub embedding: bool, pub flash_attn: bool, pub expert_count: i32, pub jinja: bool, pub chat_template: Option<String>, pub chat_template_kwargs: Option<String>, pub seed: i32, pub temperature: f32, pub top_k: i32, pub top_p: f32, pub min_p: f32, pub typical_p: f32, pub mirostat: Mirostat, pub mirostat_lr: f32, pub mirostat_ent: f32, pub ignore_eos: bool, pub samplers: Samplers, pub repeat_penalty: f32, pub repeat_last_n: i32, pub presence_penalty: Option<f32>, pub frequency_penalty: Option<f32>, pub dry_multiplier: f32, pub dry_base: f32, pub dry_allowed_length: i32, pub dry_penalty_last_n: i32, pub rope_scaling: RopeScaling, pub rope_scale: f32, pub rope_freq_base: f32, pub rope_freq_scale: f32, pub rope_yarn_enabled: bool, pub host: String, pub port: u16, pub timeout: u32, pub cache_prompt: bool, pub cache_reuse: u32, pub webui: bool, pub max_tokens: Option<u32>, pub cache_type: CacheType, pub backend: Backend, pub llama_cpp_version_cpu: Option<String>, pub llama_cpp_version_vulkan: Option<String>, pub llama_cpp_version_rocm: Option<String>, pub llama_cpp_version_rocm_lemonade: Option<String>, pub llama_cpp_version_cuda: Option<String>, pub api_endpoint_enabled: bool, pub api_endpoint_port: u16, pub spec_type: String, pub draft_tokens: u32, pub tags: Vec<String>, pub ws_server_enabled: bool, pub ws_server_port: u16, pub ws_server_auth_key: Option<String>, pub ws_server_tls_enabled: bool, pub ws_server_tls_cert: Option<String>, pub ws_server_tls_key: Option<String>,
}
Expand description

Settings for loading a model via llama.cpp server.

Fields§

§context_length: u32

Size of the prompt context.

§threads: u32

Number of CPU threads for generation.

§threads_batch: u32

Number of CPU threads for batch processing.

§batch_size: u32

Logical maximum batch size.

§ubatch_size: u32

Physical maximum batch size (micro-batch).

§parallel: u32

Max concurrent predictions (sequences).

§max_concurrent_predictions: Option<u32>

Max concurrent predictions (requests in flight). None means no –parallel argument.

§uniform_cache: bool

Use uniform (unified) KV cache across all sequences.

§kv_cache_offload: bool

Offload KV cache to system RAM.

§cache_type_k: Option<CacheTypeK>

KV cache data type for K.

§cache_type_v: Option<CacheTypeV>

KV cache data type for V.

§keep: i32

Keep N tokens from the initial prompt.

§swa_full: bool

Use full-size SWA cache.

§mlock: bool

Force system to keep model in RAM.

§mmap: bool

Memory-map the model.

§numa: NumMode

NUMA optimization.

§system_prompt: String

System prompt.

§system_prompt_preset_name: String

Name of the system prompt preset currently selected.

§gpu_layers_mode: GpuLayersMode

GPU layer offloading mode.

§split_mode: SplitMode

Split mode across multiple GPUs.

§tensor_split: String

Fraction of model offloaded to each GPU (comma-separated).

§main_gpu: i32

Main GPU index.

§fit: bool

Whether to adjust arguments to fit device memory.

§lora: Option<PathBuf>

Path to LoRA adapter.

§lora_scaled: Option<(PathBuf, f32)>

Path to LoRA adapter with scale.

§rpc: String

RPC servers.

§embedding: bool

Restrict to embedding use case.

§flash_attn: bool

Enable Flash Attention.

§expert_count: i32

Active experts per token (MoE models, -1 = model default).

§jinja: bool

Use Jinja template engine for chat.

§chat_template: Option<String>

Custom chat template string.

§chat_template_kwargs: Option<String>

JSON string for –chat-template-kwargs (e.g. {“enable_thinking”: false}).

§seed: i32

RNG seed (-1 = random).

§temperature: f32

Temperature.

§top_k: i32

Top-k sampling (0 = disabled).

§top_p: f32

Top-p sampling (1.0 = disabled).

§min_p: f32

Minimum probability for a token.

§typical_p: f32

Locally typical sampling parameter p.

§mirostat: Mirostat

Mirostat version (0=off, 1=Mirostat, 2=Mirostat2).

§mirostat_lr: f32

Mirostat learning rate (eta).

§mirostat_ent: f32

Mirostat target entropy (tau).

§ignore_eos: bool

Ignore end-of-stream token.

§samplers: Samplers

Sampler order string.

§repeat_penalty: f32

Penalize repeat sequence of tokens.

§repeat_last_n: i32

Last N tokens to consider for repeat penalty.

§presence_penalty: Option<f32>

Repeat alpha presence penalty.

§frequency_penalty: Option<f32>

Repeat alpha frequency penalty.

§dry_multiplier: f32

DRY sampling multiplier.

§dry_base: f32

DRY sampling base value.

§dry_allowed_length: i32

DRY allowed length.

§dry_penalty_last_n: i32

DRY penalty last N.

§rope_scaling: RopeScaling

RoPE frequency scaling method.

§rope_scale: f32

RoPE context scaling factor.

§rope_freq_base: f32

RoPE base frequency.

§rope_freq_scale: f32

RoPE frequency scaling factor.

§rope_yarn_enabled: bool

Enable Yarn RoPE scaling mode.

§host: String

Host address.

§port: u16

Port.

§timeout: u32

Server timeout in seconds.

§cache_prompt: bool

Whether to enable prompt caching.

§cache_reuse: u32

Min chunk size for cache reuse.

§webui: bool

Whether to enable WebUI.

§max_tokens: Option<u32>

Max tokens to predict.

§cache_type: CacheType

Cache type (legacy, kept for compatibility).

§backend: Backend

Backend (cpu/vulkan).

§llama_cpp_version_cpu: Option<String>

llama.cpp release tag for CPU backend (e.g. “b1234” or None for latest).

§llama_cpp_version_vulkan: Option<String>

llama.cpp release tag for Vulkan backend (e.g. “b1234” or None for latest).

§llama_cpp_version_rocm: Option<String>

llama.cpp release tag for ROCm backend (e.g. “b1234” or None for latest).

§llama_cpp_version_rocm_lemonade: Option<String>

Lemonade llama.cpp release tag for ROCm backend.

§llama_cpp_version_cuda: Option<String>

llama.cpp release tag for CUDA backend.

§api_endpoint_enabled: bool

Whether to enable the API proxy server.

§api_endpoint_port: u16

Port for the API proxy server.

§spec_type: String

Speculative decoding type (e.g., “draft-mtp”, “ngram-simple”, “” for off).

§draft_tokens: u32

Number of draft tokens for MTP.

§tags: Vec<String>

Tags for the model.

§ws_server_enabled: bool

Whether to enable the WebSocket dashboard server.

§ws_server_port: u16§ws_server_auth_key: Option<String>§ws_server_tls_enabled: bool§ws_server_tls_cert: Option<String>§ws_server_tls_key: Option<String>

Implementations§

Source§

impl ModelSettings

Source

pub fn get_active_backend_version(&self) -> Option<&String>

Get the version string for the currently active backend.

Source

pub fn get_active_backend_version_display(&self) -> &str

Get the display version string for the currently active backend (defaults to “latest”).

Source

pub fn set_active_backend_version(&mut self, tag: Option<String>)

Set the version string for the currently active backend.

Source§

impl ModelSettings

Source

pub fn from_config(config: &Config) -> Self

Create ModelSettings from config defaults, applying model-specific overrides.

Source§

impl ModelSettings

Source

pub fn is_dirty(&self, other: &Self) -> bool

Check if this settings differs from other in any field. Uses derived PartialEq which compares all fields — compiler-enforced.

Trait Implementations§

Source§

impl Clone for ModelSettings

Source§

fn clone(&self) -> ModelSettings

Returns a duplicate of the value. Read more
1.0.0 (const: unstable) · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for ModelSettings

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Default for ModelSettings

Source§

fn default() -> Self

Returns the “default value” for a type. Read more
Source§

impl<'de> Deserialize<'de> for ModelSettings

Source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
Source§

impl From<DefaultParams> for ModelSettings

Source§

fn from(dp: DefaultParams) -> Self

Converts to this type from the input type.
Source§

impl PartialEq for ModelSettings

Source§

fn eq(&self, other: &ModelSettings) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 (const: unstable) · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl Serialize for ModelSettings

Source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more
Source§

impl StructuralPartialEq for ModelSettings

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<'a, T, E> AsTaggedExplicit<'a, E> for T
where T: 'a,

Source§

fn explicit(self, class: Class, tag: u32) -> TaggedParser<'a, Explicit, Self, E>

Source§

impl<'a, T, E> AsTaggedImplicit<'a, E> for T
where T: 'a,

Source§

fn implicit( self, class: Class, constructed: bool, tag: u32, ) -> TaggedParser<'a, Implicit, Self, E>

Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> FromRef<T> for T
where T: Clone,

Source§

fn from_ref(input: &T) -> T

Converts to this type from a reference to the input type.
Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

Source§

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

Source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,

Source§

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,