llama_cpp_sys_4/common.rs
1//! Manual wrapper for values in llama.cpp/common/common.h
2
3use crate::LLAMA_DEFAULT_SEED;
4
5pub const COMMON_SAMPLER_TYPE_NONE: common_sampler_type = 0;
6pub const COMMON_SAMPLER_TYPE_DRY: common_sampler_type = 1;
7pub const COMMON_SAMPLER_TYPE_TOP_K: common_sampler_type = 2;
8pub const COMMON_SAMPLER_TYPE_TOP_P: common_sampler_type = 3;
9pub const COMMON_SAMPLER_TYPE_MIN_P: common_sampler_type = 4;
10pub const COMMON_SAMPLER_TYPE_TFS_Z: common_sampler_type = 5;
11pub const COMMON_SAMPLER_TYPE_TYPICAL_P: common_sampler_type = 6;
12pub const COMMON_SAMPLER_TYPE_TEMPERATURE: common_sampler_type = 7;
13pub const COMMON_SAMPLER_TYPE_XTC: common_sampler_type = 8;
14pub const COMMON_SAMPLER_TYPE_INFILL: common_sampler_type = 9;
15pub type common_sampler_type = ::core::ffi::c_uint;
16
17/// common sampler params
18#[repr(C)]
19#[derive(Debug, PartialEq)]
20pub struct common_sampler_params {
21 /// the seed used to initialize `llama_sampler`
22 pub seed: u32,
23 /// number of previous tokens to remember
24 pub n_prev: i32,
25 /// if greater than 0, output the probabilities of top `n_probs` tokens.
26 pub n_probs: i32,
27 /// 0 = disabled, otherwise samplers should return at least `min_keep` tokens
28 pub min_keep: i32,
29 /// <= 0 to use vocab size
30 pub top_k: i32,
31 /// 1.0 = disabled
32 pub top_p: f32,
33 /// 0.0 = disabled
34 pub min_p: f32,
35 /// 0.0 = disabled
36 pub xtc_probability: f32,
37 /// > 0.5 disables XTC
38 pub xtc_threshold: f32,
39 /// 1.0 = disabled
40 pub tfs_z: f32,
41 /// typical_p, 1.0 = disabled
42 pub typ_p: f32,
43 /// <= 0.0 to sample greedily, 0.0 to not output probabilities
44 pub temp: f32,
45 /// 0.0 = disabled
46 pub dynatemp_range: f32,
47 /// controls how entropy maps to temperature in dynamic temperature sampler
48 pub dynatemp_exponent: f32,
49 /// last n tokens to penalize (0 = disable penalty, -1 = context size)
50 pub penalty_last_n: i32,
51 /// 1.0 = disabled
52 pub penalty_repeat: f32,
53 /// 0.0 = disabled
54 pub penalty_freq: f32,
55 /// 0.0 = disabled
56 pub penalty_present: f32,
57 /// 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
58 pub dry_multiplier: f32,
59 /// 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
60 pub dry_base: f32,
61 /// tokens extending repetitions beyond this receive penalty
62 pub dry_allowed_length: i32,
63 /// how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
64 pub dry_penalty_last_n: i32,
65 /// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
66 pub mirostat: i32,
67 /// target entropy
68 pub mirostat_tau: f32,
69 /// learning rate
70 pub mirostat_eta: f32,
71 /// consider newlines as a repeatable token
72 pub penalize_nl: bool,
73 pub ignore_eos: bool,
74 /// disable performance metrics
75 pub no_perf: bool,
76 pub dry_sequence_breakers: Vec<String>,
77 pub samplers: Vec<common_sampler_type>,
78 pub grammar: Vec<String>,
79 pub logit_bias: Vec<(i32, f64)>,
80}
81
82impl Default for common_sampler_params {
83 fn default() -> Self {
84 Self {
85 seed: LLAMA_DEFAULT_SEED, // the seed used to initialize llama_sampler
86 n_prev: 64, // number of previous tokens to remember
87 n_probs: 0, // if greater than 0, output the probabilities of top n_probs tokens.
88 min_keep: 0, // 0 = disabled, otherwise samplers should return at least min_keep tokens
89 top_k: 40, // <= 0 to use vocab size
90 top_p: 0.95, // 1.0 = disabled
91 min_p: 0.05, // 0.0 = disabled
92 xtc_probability: 0.00, // 0.0 = disabled
93 xtc_threshold: 0.10, // > 0.5 disables XTC
94 tfs_z: 1.00, // 1.0 = disabled
95 typ_p: 1.00, // typical_p, 1.0 = disabled
96 temp: 0.80, // <= 0.0 to sample greedily, 0.0 to not output probabilities
97 dynatemp_range: 0.00, // 0.0 = disabled
98 dynatemp_exponent: 1.00, // controls how entropy maps to temperature in dynamic temperature sampler
99 penalty_last_n: 64, // last n tokens to penalize (0 = disable penalty, -1 = context size)
100 penalty_repeat: 1.00, // 1.0 = disabled
101 penalty_freq: 0.00, // 0.0 = disabled
102 penalty_present: 0.00, // 0.0 = disabled
103 dry_multiplier: 0.0, // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
104 dry_base: 1.75, // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
105 dry_allowed_length: 2, // tokens extending repetitions beyond this receive penalty
106 dry_penalty_last_n: -1, // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
107 mirostat: 0, // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
108 mirostat_tau: 5.00, // target entropy
109 mirostat_eta: 0.10, // learning rate
110 penalize_nl: false, // consider newlines as a repeatable token
111 ignore_eos: false,
112 no_perf: false, // disable performance metrics
113
114 dry_sequence_breakers: vec!["\n".into(), ":".into(), "\"".into(), "*".into()], // default sequence breakers for DRY
115
116 samplers: vec![
117 COMMON_SAMPLER_TYPE_DRY,
118 COMMON_SAMPLER_TYPE_TOP_K,
119 COMMON_SAMPLER_TYPE_TFS_Z,
120 COMMON_SAMPLER_TYPE_TYPICAL_P,
121 COMMON_SAMPLER_TYPE_TOP_P,
122 COMMON_SAMPLER_TYPE_MIN_P,
123 COMMON_SAMPLER_TYPE_XTC,
124 COMMON_SAMPLER_TYPE_TEMPERATURE,
125 ],
126
127 grammar: vec![], // optional BNF-like grammar to constrain sampling
128
129 logit_bias: vec![], // logit biases to apply
130 }
131 }
132}