Skip to main content

llama_cpp_sys_4/
common.rs

1//! Manual wrapper for values in llama.cpp/common/common.h
2
3use crate::LLAMA_DEFAULT_SEED;
4
5pub const COMMON_SAMPLER_TYPE_NONE: common_sampler_type = 0;
6pub const COMMON_SAMPLER_TYPE_DRY: common_sampler_type = 1;
7pub const COMMON_SAMPLER_TYPE_TOP_K: common_sampler_type = 2;
8pub const COMMON_SAMPLER_TYPE_TOP_P: common_sampler_type = 3;
9pub const COMMON_SAMPLER_TYPE_MIN_P: common_sampler_type = 4;
10pub const COMMON_SAMPLER_TYPE_TFS_Z: common_sampler_type = 5;
11pub const COMMON_SAMPLER_TYPE_TYPICAL_P: common_sampler_type = 6;
12pub const COMMON_SAMPLER_TYPE_TEMPERATURE: common_sampler_type = 7;
13pub const COMMON_SAMPLER_TYPE_XTC: common_sampler_type = 8;
14pub const COMMON_SAMPLER_TYPE_INFILL: common_sampler_type = 9;
15pub type common_sampler_type = ::core::ffi::c_uint;
16
17/// common sampler params
18#[repr(C)]
19#[derive(Debug, PartialEq)]
20pub struct common_sampler_params {
21    /// the seed used to initialize `llama_sampler`
22    pub seed: u32,
23    /// number of previous tokens to remember
24    pub n_prev: i32,
25    /// if greater than 0, output the probabilities of top `n_probs` tokens.
26    pub n_probs: i32,
27    /// 0 = disabled, otherwise samplers should return at least `min_keep` tokens
28    pub min_keep: i32,
29    /// <= 0 to use vocab size
30    pub top_k: i32,
31    /// 1.0 = disabled
32    pub top_p: f32,
33    /// 0.0 = disabled
34    pub min_p: f32,
35    /// 0.0 = disabled
36    pub xtc_probability: f32,
37    /// > 0.5 disables XTC
38    pub xtc_threshold: f32,
39    /// 1.0 = disabled
40    pub tfs_z: f32,
41    /// typical_p, 1.0 = disabled
42    pub typ_p: f32,
43    /// <= 0.0 to sample greedily, 0.0 to not output probabilities
44    pub temp: f32,
45    /// 0.0 = disabled
46    pub dynatemp_range: f32,
47    /// controls how entropy maps to temperature in dynamic temperature sampler
48    pub dynatemp_exponent: f32,
49    /// last n tokens to penalize (0 = disable penalty, -1 = context size)
50    pub penalty_last_n: i32,
51    /// 1.0 = disabled
52    pub penalty_repeat: f32,
53    /// 0.0 = disabled
54    pub penalty_freq: f32,
55    /// 0.0 = disabled
56    pub penalty_present: f32,
57    /// 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
58    pub dry_multiplier: f32,
59    /// 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
60    pub dry_base: f32,
61    /// tokens extending repetitions beyond this receive penalty
62    pub dry_allowed_length: i32,
63    /// how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
64    pub dry_penalty_last_n: i32,
65    /// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
66    pub mirostat: i32,
67    /// target entropy
68    pub mirostat_tau: f32,
69    /// learning rate
70    pub mirostat_eta: f32,
71    /// consider newlines as a repeatable token
72    pub penalize_nl: bool,
73    pub ignore_eos: bool,
74    /// disable performance metrics
75    pub no_perf: bool,
76    pub dry_sequence_breakers: Vec<String>,
77    pub samplers: Vec<common_sampler_type>,
78    pub grammar: Vec<String>,
79    pub logit_bias: Vec<(i32, f64)>,
80}
81
82impl Default for common_sampler_params {
83    fn default() -> Self {
84        Self {
85            seed: LLAMA_DEFAULT_SEED, // the seed used to initialize llama_sampler
86            n_prev: 64,               // number of previous tokens to remember
87            n_probs: 0, // if greater than 0, output the probabilities of top n_probs tokens.
88            min_keep: 0, // 0 = disabled, otherwise samplers should return at least min_keep tokens
89            top_k: 40,  // <= 0 to use vocab size
90            top_p: 0.95, // 1.0 = disabled
91            min_p: 0.05, // 0.0 = disabled
92            xtc_probability: 0.00, // 0.0 = disabled
93            xtc_threshold: 0.10, // > 0.5 disables XTC
94            tfs_z: 1.00, // 1.0 = disabled
95            typ_p: 1.00, // typical_p, 1.0 = disabled
96            temp: 0.80, // <= 0.0 to sample greedily, 0.0 to not output probabilities
97            dynatemp_range: 0.00, // 0.0 = disabled
98            dynatemp_exponent: 1.00, // controls how entropy maps to temperature in dynamic temperature sampler
99            penalty_last_n: 64, // last n tokens to penalize (0 = disable penalty, -1 = context size)
100            penalty_repeat: 1.00, // 1.0 = disabled
101            penalty_freq: 0.00, // 0.0 = disabled
102            penalty_present: 0.00, // 0.0 = disabled
103            dry_multiplier: 0.0, // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
104            dry_base: 1.75, // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
105            dry_allowed_length: 2, // tokens extending repetitions beyond this receive penalty
106            dry_penalty_last_n: -1, // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
107            mirostat: 0,            // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
108            mirostat_tau: 5.00,     // target entropy
109            mirostat_eta: 0.10,     // learning rate
110            penalize_nl: false,     // consider newlines as a repeatable token
111            ignore_eos: false,
112            no_perf: false, // disable performance metrics
113
114            dry_sequence_breakers: vec!["\n".into(), ":".into(), "\"".into(), "*".into()], // default sequence breakers for DRY
115
116            samplers: vec![
117                COMMON_SAMPLER_TYPE_DRY,
118                COMMON_SAMPLER_TYPE_TOP_K,
119                COMMON_SAMPLER_TYPE_TFS_Z,
120                COMMON_SAMPLER_TYPE_TYPICAL_P,
121                COMMON_SAMPLER_TYPE_TOP_P,
122                COMMON_SAMPLER_TYPE_MIN_P,
123                COMMON_SAMPLER_TYPE_XTC,
124                COMMON_SAMPLER_TYPE_TEMPERATURE,
125            ],
126
127            grammar: vec![], // optional BNF-like grammar to constrain sampling
128
129            logit_bias: vec![], // logit biases to apply
130        }
131    }
132}