#[repr(C)]pub struct common_params {Show 30 fields
pub n_predict: i32,
pub n_ctx: i32,
pub n_batch: i32,
pub n_ubatch: i32,
pub n_keep: i32,
pub n_draft: i32,
pub n_chunks: i32,
pub n_parallel: i32,
pub n_sequences: i32,
pub p_split: f32,
pub n_gpu_layers: i32,
pub n_gpu_layers_draft: i32,
pub main_gpu: i32,
pub grp_attn_n: i32,
pub grp_attn_w: i32,
pub n_print: i32,
pub rope_freq_base: f32,
pub rope_freq_scale: f32,
pub yarn_ext_factor: f32,
pub yarn_attn_factor: f32,
pub yarn_beta_fast: f32,
pub yarn_beta_slow: f32,
pub yarn_orig_ctx: i32,
pub defrag_thold: f32,
pub numa: ggml_numa_strategy,
pub split_mode: llama_split_mode,
pub rope_scaling_type: llama_rope_scaling_type,
pub pooling_type: llama_pooling_type,
pub attention_type: llama_attention_type,
pub sparams: common_sampler_params,
}Fields§
§n_predict: i32new tokens to predict
n_ctx: i32context size
n_batch: i32logical batch size for prompt processing (must be >=32 to use BLAS)
n_ubatch: i32physical batch size for prompt processing (must be >=32 to use BLAS)
n_keep: i32number of tokens to keep from initial prompt
n_draft: i32number of tokens to draft during speculative decoding
n_chunks: i32max number of chunks to process (-1 = unlimited)
n_parallel: i32number of parallel sequences to decode
n_sequences: i32number of sequences to decode
p_split: f32§n_gpu_layers: i32number of layers to store in VRAM (-1 - use default)
n_gpu_layers_draft: i32number of layers to store in VRAM for the draft model (-1 - use default)
main_gpu: i32the GPU that is used for scratch and small tensors
grp_attn_n: i32how split tensors should be distributed across GPUs group-attention factor
grp_attn_w: i32group-attention width
n_print: i32print token count every n tokens (-1 = disabled)
rope_freq_base: f32RoPE base frequency
rope_freq_scale: f32RoPE frequency scaling factor
yarn_ext_factor: f32YaRN extrapolation mix factor
yarn_attn_factor: f32YaRN magnitude scaling factor
yarn_beta_fast: f32YaRN low correction dim
yarn_beta_slow: f32YaRN high correction dim
yarn_orig_ctx: i32YaRN original context length
defrag_thold: f32KV cache defragmentation threshold
numa: ggml_numa_strategy§split_mode: llama_split_mode§rope_scaling_type: llama_rope_scaling_type§pooling_type: llama_pooling_type§attention_type: llama_attention_type§sparams: common_sampler_params