Skip to main content

llama_cpp_4/
common.rs

1//! exposing common llama cpp structures like `CommonParams`
2pub use llama_cpp_sys_4::common::*;
3
4/// Struct containing common parameters for processing.
5/// ## See more
6/// <https://github.com/ggerganov/llama.cpp/blob/master/common/common.h#L109>
7#[derive(Debug, Clone)]
8pub struct CommonParams {
9    /// New tokens to predict
10    pub n_predict: i32,
11
12    /// Context size
13    pub n_ctx: i32,
14
15    /// Logical batch size for prompt processing (must be >=32 to use BLAS)
16    pub n_batch: i32,
17
18    /// Physical batch size for prompt processing (must be >=32 to use BLAS)
19    pub n_ubatch: i32,
20
21    /// Number of tokens to keep from initial prompt
22    pub n_keep: i32,
23
24    /// Max number of chunks to process (-1 = unlimited)
25    pub n_chunks: i32,
26
27    /// Number of parallel sequences to decode
28    pub n_parallel: i32,
29
30    /// Number of sequences to decode
31    pub n_sequences: i32,
32
33    /// Group-attention factor
34    pub grp_attn_n: i32,
35
36    /// Group-attention width
37    pub grp_attn_w: i32,
38
39    /// Print token count every n tokens (-1 = disabled)
40    pub n_print: i32,
41
42    /// `RoPE` base frequency
43    pub rope_freq_base: f32,
44
45    /// `RoPE` frequency scaling factor
46    pub rope_freq_scale: f32,
47
48    /// `YaRN` extrapolation mix factor
49    pub yarn_ext_factor: f32,
50
51    /// `YaRN` magnitude scaling factor
52    pub yarn_attn_factor: f32,
53
54    /// `YaRN` low correction dim
55    pub yarn_beta_fast: f32,
56
57    /// `YaRN` high correction dim
58    pub yarn_beta_slow: f32,
59
60    /// `YaRN` original context length
61    pub yarn_orig_ctx: i32,
62
63    /// KV cache defragmentation threshold
64    pub defrag_thold: f32,
65
66    /// prompt for the model to consume
67    pub prompt: String,
68}
69
70impl Default for CommonParams {
71    fn default() -> Self {
72        CommonParams {
73            n_predict: -1,
74            n_ctx: 4096,
75            n_batch: 2048,
76            n_ubatch: 512,
77            n_keep: 0,
78            n_chunks: -1,
79            n_parallel: 1,
80            n_sequences: 1,
81            grp_attn_n: 1,
82            grp_attn_w: 512,
83            n_print: -1,
84            rope_freq_base: 0.0,
85            rope_freq_scale: 0.0,
86            yarn_ext_factor: -1.0,
87            yarn_attn_factor: 1.0,
88            yarn_beta_fast: 32.0,
89            yarn_beta_slow: 1.0,
90            yarn_orig_ctx: 0,
91            defrag_thold: 0.1,
92            prompt: String::new(),
93        }
94    }
95}