Struct ModelSettings

Source

pub struct ModelSettings {Show 81 fields
    pub context_length: u32,
    pub threads: u32,
    pub threads_batch: u32,
    pub batch_size: u32,
    pub ubatch_size: u32,
    pub parallel: u32,
    pub max_concurrent_predictions: Option<u32>,
    pub uniform_cache: bool,
    pub kv_cache_offload: bool,
    pub cache_type_k: Option<CacheTypeK>,
    pub cache_type_v: Option<CacheTypeV>,
    pub keep: i32,
    pub swa_full: bool,
    pub mlock: bool,
    pub mmap: bool,
    pub numa: NumMode,
    pub system_prompt: String,
    pub system_prompt_preset_name: String,
    pub gpu_layers_mode: GpuLayersMode,
    pub split_mode: SplitMode,
    pub tensor_split: String,
    pub main_gpu: i32,
    pub fit: bool,
    pub lora: Option<PathBuf>,
    pub lora_scaled: Option<(PathBuf, f32)>,
    pub rpc: String,
    pub embedding: bool,
    pub flash_attn: bool,
    pub expert_count: i32,
    pub jinja: bool,
    pub chat_template: Option<String>,
    pub chat_template_kwargs: Option<String>,
    pub seed: i32,
    pub temperature: f32,
    pub top_k: i32,
    pub top_p: f32,
    pub min_p: f32,
    pub typical_p: f32,
    pub mirostat: Mirostat,
    pub mirostat_lr: f32,
    pub mirostat_ent: f32,
    pub ignore_eos: bool,
    pub samplers: Samplers,
    pub repeat_penalty: f32,
    pub repeat_last_n: i32,
    pub presence_penalty: Option<f32>,
    pub frequency_penalty: Option<f32>,
    pub dry_multiplier: f32,
    pub dry_base: f32,
    pub dry_allowed_length: i32,
    pub dry_penalty_last_n: i32,
    pub rope_scaling: RopeScaling,
    pub rope_scale: f32,
    pub rope_freq_base: f32,
    pub rope_freq_scale: f32,
    pub rope_yarn_enabled: bool,
    pub host: String,
    pub port: u16,
    pub timeout: u32,
    pub cache_prompt: bool,
    pub cache_reuse: u32,
    pub webui: bool,
    pub max_tokens: Option<u32>,
    pub cache_type: CacheType,
    pub backend: Backend,
    pub llama_cpp_version_cpu: Option<String>,
    pub llama_cpp_version_vulkan: Option<String>,
    pub llama_cpp_version_rocm: Option<String>,
    pub llama_cpp_version_rocm_lemonade: Option<String>,
    pub llama_cpp_version_cuda: Option<String>,
    pub api_endpoint_enabled: bool,
    pub api_endpoint_port: u16,
    pub spec_type: String,
    pub draft_tokens: u32,
    pub tags: Vec<String>,
    pub ws_server_enabled: bool,
    pub ws_server_port: u16,
    pub ws_server_auth_key: Option<String>,
    pub ws_server_tls_enabled: bool,
    pub ws_server_tls_cert: Option<String>,
    pub ws_server_tls_key: Option<String>,
}

Expand description

Settings for loading a model via llama.cpp server.

Fields§

§context_length: u32

Size of the prompt context.

§threads: u32

Number of CPU threads for generation.

§threads_batch: u32

Number of CPU threads for batch processing.

§batch_size: u32

Logical maximum batch size.

§ubatch_size: u32

Physical maximum batch size (micro-batch).

§parallel: u32

Max concurrent predictions (sequences).

§max_concurrent_predictions: Option<u32>

Max concurrent predictions (requests in flight). None means no –parallel argument.

§uniform_cache: bool

Use uniform (unified) KV cache across all sequences.

§kv_cache_offload: bool

Offload KV cache to system RAM.

§cache_type_k: Option<CacheTypeK>

KV cache data type for K.

§cache_type_v: Option<CacheTypeV>

KV cache data type for V.

§keep: i32

Keep N tokens from the initial prompt.

§swa_full: bool

Use full-size SWA cache.

§mlock: bool

Force system to keep model in RAM.

§mmap: bool

Memory-map the model.

§numa: NumMode

NUMA optimization.

§system_prompt: String

System prompt.

§system_prompt_preset_name: String

Name of the system prompt preset currently selected.

§gpu_layers_mode: GpuLayersMode

GPU layer offloading mode.

§split_mode: SplitMode

Split mode across multiple GPUs.

§tensor_split: String

Fraction of model offloaded to each GPU (comma-separated).

§main_gpu: i32

Main GPU index.

§fit: bool

Whether to adjust arguments to fit device memory.

§lora: Option<PathBuf>

Path to LoRA adapter.

§lora_scaled: Option<(PathBuf, f32)>

Path to LoRA adapter with scale.

§rpc: String

RPC servers.

§embedding: bool

Restrict to embedding use case.

§flash_attn: bool

Enable Flash Attention.

§expert_count: i32

Active experts per token (MoE models, -1 = model default).

§jinja: bool

Use Jinja template engine for chat.

§chat_template: Option<String>

Custom chat template string.

§chat_template_kwargs: Option<String>

JSON string for –chat-template-kwargs (e.g. {“enable_thinking”: false}).

§seed: i32

RNG seed (-1 = random).

§temperature: f32

Temperature.

§top_k: i32

Top-k sampling (0 = disabled).

§top_p: f32

Top-p sampling (1.0 = disabled).

§min_p: f32

Minimum probability for a token.

§typical_p: f32

Locally typical sampling parameter p.

§mirostat: Mirostat

Mirostat version (0=off, 1=Mirostat, 2=Mirostat2).

§mirostat_lr: f32

Mirostat learning rate (eta).

§mirostat_ent: f32

Mirostat target entropy (tau).

§ignore_eos: bool

Ignore end-of-stream token.

§samplers: Samplers

Sampler order string.

§repeat_penalty: f32

Penalize repeat sequence of tokens.

§repeat_last_n: i32

Last N tokens to consider for repeat penalty.

§presence_penalty: Option<f32>

Repeat alpha presence penalty.

§frequency_penalty: Option<f32>

Repeat alpha frequency penalty.

§dry_multiplier: f32

DRY sampling multiplier.

§dry_base: f32

DRY sampling base value.

§dry_allowed_length: i32

DRY allowed length.

§dry_penalty_last_n: i32

DRY penalty last N.

§rope_scaling: RopeScaling

RoPE frequency scaling method.

§rope_scale: f32

RoPE context scaling factor.

§rope_freq_base: f32

RoPE base frequency.

§rope_freq_scale: f32

RoPE frequency scaling factor.

§rope_yarn_enabled: bool

Enable Yarn RoPE scaling mode.

§host: String

Host address.

§port: u16

Port.

§timeout: u32

Server timeout in seconds.

§cache_prompt: bool

Whether to enable prompt caching.

§cache_reuse: u32

Min chunk size for cache reuse.

§webui: bool

Whether to enable WebUI.

§max_tokens: Option<u32>

Max tokens to predict.

§cache_type: CacheType

Cache type (legacy, kept for compatibility).

§backend: Backend

Backend (cpu/vulkan).

§llama_cpp_version_cpu: Option<String>

llama.cpp release tag for CPU backend (e.g. “b1234” or None for latest).

§llama_cpp_version_vulkan: Option<String>

llama.cpp release tag for Vulkan backend (e.g. “b1234” or None for latest).

§llama_cpp_version_rocm: Option<String>

llama.cpp release tag for ROCm backend (e.g. “b1234” or None for latest).

§llama_cpp_version_rocm_lemonade: Option<String>

Lemonade llama.cpp release tag for ROCm backend.

§llama_cpp_version_cuda: Option<String>

llama.cpp release tag for CUDA backend.

§api_endpoint_enabled: bool

Whether to enable the API proxy server.

§api_endpoint_port: u16

Port for the API proxy server.

§spec_type: String

Speculative decoding type (e.g., “draft-mtp”, “ngram-simple”, “” for off).

§draft_tokens: u32

Number of draft tokens for MTP.

§tags: Vec<String>

Tags for the model.

§ws_server_enabled: bool

Whether to enable the WebSocket dashboard server.

§ws_server_port: u16§ws_server_auth_key: Option<String>§ws_server_tls_enabled: bool§ws_server_tls_cert: Option<String>§ws_server_tls_key: Option<String>

Implementations§

Source §

impl ModelSettings

Source

pub fn get_active_backend_version(&self) -> Option<&String>

Get the version string for the currently active backend.

Source

pub fn get_active_backend_version_display(&self) -> &str

Get the display version string for the currently active backend (defaults to “latest”).

Source

pub fn set_active_backend_version(&mut self, tag: Option<String>)

Set the version string for the currently active backend.

Source §

impl ModelSettings

Source

pub fn from_config(config: &Config) -> Self

Create ModelSettings from config defaults, applying model-specific overrides.

Source §

impl ModelSettings

Source

pub fn is_dirty(&self, other: &Self) -> bool

Check if this settings differs from other in any field. Uses derived PartialEq which compares all fields — compiler-enforced.

Trait Implementations§

Source §

impl Clone for ModelSettings

Source §

fn clone(&self) -> ModelSettings

Returns a duplicate of the value. Read more

1.0.0 (const: unstable) · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for ModelSettings

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl Default for ModelSettings

Source §

fn default() -> Self

Returns the “default value” for a type. Read more

Source §

impl<'de> Deserialize<'de> for ModelSettings

Source §

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more

Source §

impl From<DefaultParams> for ModelSettings

Source §

fn from(dp: DefaultParams) -> Self

Converts to this type from the input type.

Source §

impl PartialEq for ModelSettings

Source §

fn eq(&self, other: &ModelSettings) -> bool

Tests for self and other values to be equal, and is used by ==.

1.0.0 (const: unstable) · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.

Source §

impl Serialize for ModelSettings

Source §

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

Serialize this value into the given Serde serializer. Read more

Source §

impl StructuralPartialEq for ModelSettings

Auto Trait Implementations§

§

impl UnwindSafe for ModelSettings

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<'a, T, E> AsTaggedExplicit<'a, E> for T
where T: 'a,

Source §

fn explicit(self, class: Class, tag: u32) -> TaggedParser<'a, Explicit, Self, E>

Source §

impl<'a, T, E> AsTaggedImplicit<'a, E> for T
where T: 'a,

Source §

fn implicit( self, class: Class, constructed: bool, tag: u32, ) -> TaggedParser<'a, Implicit, Self, E>

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> FromRef<T> for T
where T: Clone,

Source §

fn from_ref(input: &T) -> T

Converts to this type from a reference to the input type.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §