pub struct LlamaSampler { /* private fields */ }Expand description
A safe wrapper around llama_sampler.
Implementations§
Source§impl LlamaSampler
impl LlamaSampler
Sourcepub fn sample(&mut self, ctx: &LlamaContext<'_>, idx: i32) -> LlamaToken
pub fn sample(&mut self, ctx: &LlamaContext<'_>, idx: i32) -> LlamaToken
Sample and accept a token from the idx-th output of the last evaluation
Sourcepub fn apply(&self, data_array: &mut LlamaTokenDataArray)
pub fn apply(&self, data_array: &mut LlamaTokenDataArray)
Applies this sampler to a LlamaTokenDataArray.
Sourcepub fn accept(&mut self, token: LlamaToken)
pub fn accept(&mut self, token: LlamaToken)
Accepts a token from the sampler, possibly updating the internal state of certain samplers (e.g. grammar, repetition, etc.)
Sourcepub fn accept_many(
&mut self,
tokens: impl IntoIterator<Item = impl Borrow<LlamaToken>>,
)
pub fn accept_many( &mut self, tokens: impl IntoIterator<Item = impl Borrow<LlamaToken>>, )
Accepts several tokens from the sampler or context, possibly updating the internal state of certain samplers (e.g. grammar, repetition, etc.)
Sourcepub fn with_tokens(
self,
tokens: impl IntoIterator<Item = impl Borrow<LlamaToken>>,
) -> Self
pub fn with_tokens( self, tokens: impl IntoIterator<Item = impl Borrow<LlamaToken>>, ) -> Self
Accepts several tokens from the sampler or context, possibly updating the internal state of certain samplers (e.g. grammar, repetition, etc.)
Sourcepub fn try_accept(
&mut self,
token: LlamaToken,
) -> Result<(), SamplerAcceptError>
pub fn try_accept( &mut self, token: LlamaToken, ) -> Result<(), SamplerAcceptError>
Try accepting a token from the sampler. Returns an error if the sampler throws.
Sourcepub fn reset(&mut self)
pub fn reset(&mut self)
Resets the internal state of the sampler.
This can be useful when you want to start fresh with a sampler without creating a new instance.
Sourcepub fn get_seed(&self) -> u32
pub fn get_seed(&self) -> u32
Gets the random seed used by this sampler.
Returns:
- For random samplers (dist, mirostat, mirostat_v2): returns their current seed
- For sampler chains: returns the first non-default seed found in reverse order
- For all other samplers: returns 0xFFFFFFFF
Sourcepub fn chain(samplers: impl IntoIterator<Item = Self>, no_perf: bool) -> Self
pub fn chain(samplers: impl IntoIterator<Item = Self>, no_perf: bool) -> Self
Combines a list of samplers into a single sampler that applies each component sampler one after another.
If you are using a chain to select a token, the chain should always end with one of
LlamaSampler::greedy, LlamaSampler::dist, LlamaSampler::mirostat, and
LlamaSampler::mirostat_v2.
Sourcepub fn chain_simple(samplers: impl IntoIterator<Item = Self>) -> Self
pub fn chain_simple(samplers: impl IntoIterator<Item = Self>) -> Self
Same as Self::chain with no_perf = false.
§Example
use llama_cpp_2::token::{
LlamaToken,
data::LlamaTokenData,
data_array::LlamaTokenDataArray
};
use llama_cpp_2::sampling::LlamaSampler;
use llama_cpp_2::llama_backend::LlamaBackend;
let backend = LlamaBackend::init().unwrap();
let mut data_array = LlamaTokenDataArray::new(vec![
LlamaTokenData::new(LlamaToken(0), 0., 0.),
LlamaTokenData::new(LlamaToken(1), 1., 0.),
LlamaTokenData::new(LlamaToken(2), 2., 0.),
], false);
data_array.apply_sampler(&mut LlamaSampler::chain_simple([
LlamaSampler::temp(0.5),
LlamaSampler::greedy(),
]));
assert_eq!(data_array.data[0].logit(), 0.);
assert_eq!(data_array.data[1].logit(), 2.);
assert_eq!(data_array.data[2].logit(), 4.);
assert_eq!(data_array.data.len(), 3);
assert_eq!(data_array.selected_token(), Some(LlamaToken(2)));Sourcepub fn temp(t: f32) -> Self
pub fn temp(t: f32) -> Self
Updates the logits l_i’ = l_i/t. When t <= 0.0f, the maximum logit is kept at it’s original value, the rest are set to -inf
§Example:
use llama_cpp_2::token::{
LlamaToken,
data::LlamaTokenData,
data_array::LlamaTokenDataArray
};
use llama_cpp_2::sampling::LlamaSampler;
let mut data_array = LlamaTokenDataArray::new(vec![
LlamaTokenData::new(LlamaToken(0), 0., 0.),
LlamaTokenData::new(LlamaToken(1), 1., 0.),
LlamaTokenData::new(LlamaToken(2), 2., 0.),
], false);
data_array.apply_sampler(&mut LlamaSampler::temp(0.5));
assert_eq!(data_array.data[0].logit(), 0.);
assert_eq!(data_array.data[1].logit(), 2.);
assert_eq!(data_array.data[2].logit(), 4.);Sourcepub fn temp_ext(t: f32, delta: f32, exponent: f32) -> Self
pub fn temp_ext(t: f32, delta: f32, exponent: f32) -> Self
Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
Sourcepub fn top_k(k: i32) -> Self
pub fn top_k(k: i32) -> Self
Top-K sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
§Example:
use llama_cpp_2::token::{
LlamaToken,
data::LlamaTokenData,
data_array::LlamaTokenDataArray
};
use llama_cpp_2::sampling::LlamaSampler;
let mut data_array = LlamaTokenDataArray::new(vec![
LlamaTokenData::new(LlamaToken(0), 0., 0.),
LlamaTokenData::new(LlamaToken(1), 1., 0.),
LlamaTokenData::new(LlamaToken(2), 2., 0.),
LlamaTokenData::new(LlamaToken(3), 3., 0.),
], false);
data_array.apply_sampler(&mut LlamaSampler::top_k(2));
assert_eq!(data_array.data.len(), 2);
assert_eq!(data_array.data[0].id(), LlamaToken(3));
assert_eq!(data_array.data[1].id(), LlamaToken(2));Sourcepub fn top_n_sigma(n: f32) -> Self
pub fn top_n_sigma(n: f32) -> Self
Top-nσ sampling as described in academic paper “Top-nσ: Not All Logits Are You Need” https://arxiv.org/pdf/2411.07641
This method filters logits by selecting only those within n standard deviations of the mean.
§Parameters
n: Number of standard deviations from the mean to include in sampling
§Example
use llama_cpp_2::sampling::LlamaSampler;
use llama_cpp_2::token::{
LlamaToken,
data::LlamaTokenData,
data_array::LlamaTokenDataArray
};
let mut data_array = LlamaTokenDataArray::new(vec![
LlamaTokenData::new(LlamaToken(0), 0.0, 0.0),
LlamaTokenData::new(LlamaToken(1), 1.0, 0.0),
LlamaTokenData::new(LlamaToken(2), 2.0, 0.0),
], false);
data_array.apply_sampler(&mut LlamaSampler::top_n_sigma(2.0));Sourcepub fn typical(p: f32, min_keep: usize) -> Self
pub fn typical(p: f32, min_keep: usize) -> Self
Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
Sourcepub fn top_p(p: f32, min_keep: usize) -> Self
pub fn top_p(p: f32, min_keep: usize) -> Self
Nucleus sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
Sourcepub fn min_p(p: f32, min_keep: usize) -> Self
pub fn min_p(p: f32, min_keep: usize) -> Self
Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
Sourcepub fn xtc(p: f32, t: f32, min_keep: usize, seed: u32) -> Self
pub fn xtc(p: f32, t: f32, min_keep: usize, seed: u32) -> Self
XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
Sourcepub fn grammar(
model: &LlamaModel,
grammar_str: &str,
grammar_root: &str,
) -> Result<Self, GrammarError>
pub fn grammar( model: &LlamaModel, grammar_str: &str, grammar_root: &str, ) -> Result<Self, GrammarError>
Grammar sampler
Sourcepub fn grammar_lazy(
model: &LlamaModel,
grammar_str: &str,
grammar_root: &str,
trigger_words: impl IntoIterator<Item = impl AsRef<[u8]>>,
trigger_tokens: &[LlamaToken],
) -> Result<Self, GrammarError>
pub fn grammar_lazy( model: &LlamaModel, grammar_str: &str, grammar_root: &str, trigger_words: impl IntoIterator<Item = impl AsRef<[u8]>>, trigger_tokens: &[LlamaToken], ) -> Result<Self, GrammarError>
Lazy grammar sampler, introduced in https://github.com/ggerganov/llama.cpp/pull/9639
This sampler enforces grammar rules only when specific trigger words or tokens are encountered.
Sourcepub fn grammar_lazy_patterns(
model: &LlamaModel,
grammar_str: &str,
grammar_root: &str,
trigger_patterns: &[String],
trigger_tokens: &[LlamaToken],
) -> Result<Self, GrammarError>
pub fn grammar_lazy_patterns( model: &LlamaModel, grammar_str: &str, grammar_root: &str, trigger_patterns: &[String], trigger_tokens: &[LlamaToken], ) -> Result<Self, GrammarError>
Lazy grammar sampler using regex trigger patterns.
Trigger patterns are regular expressions matched from the start of the generation output. The grammar sampler will be fed content starting from the first match group.
Sourcepub fn dry(
model: &LlamaModel,
multiplier: f32,
base: f32,
allowed_length: i32,
penalty_last_n: i32,
seq_breakers: impl IntoIterator<Item = impl AsRef<[u8]>>,
) -> Self
pub fn dry( model: &LlamaModel, multiplier: f32, base: f32, allowed_length: i32, penalty_last_n: i32, seq_breakers: impl IntoIterator<Item = impl AsRef<[u8]>>, ) -> Self
DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
§Panics
If any string in seq_breakers contains null bytes.
Sourcepub fn penalties(
penalty_last_n: i32,
penalty_repeat: f32,
penalty_freq: f32,
penalty_present: f32,
) -> Self
pub fn penalties( penalty_last_n: i32, penalty_repeat: f32, penalty_freq: f32, penalty_present: f32, ) -> Self
Penalizes tokens for being present in the context.
Parameters:
penalty_last_n: last n tokens to penalize (0 = disable penalty, -1 = context size)penalty_repeat: 1.0 = disabledpenalty_freq: 0.0 = disabledpenalty_present: 0.0 = disabled
Sourcepub fn mirostat(n_vocab: i32, seed: u32, tau: f32, eta: f32, m: i32) -> Self
pub fn mirostat(n_vocab: i32, seed: u32, tau: f32, eta: f32, m: i32) -> Self
Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
§Parameters:
n_vocab:LlamaModel::n_vocabseed: Seed to initialize random generation with.tau: The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.eta: The learning rate used to updatemubased on the error between the target and observed surprisal of the sampled word. A larger learning rate will causemuto be updated more quickly, while a smaller learning rate will result in slower updates.m: The number of tokens considered in the estimation ofs_hat. This is an arbitrary value that is used to calculates_hat, which in turn helps to calculate the value ofk. In the paper, they usem = 100, but you can experiment with different values to see how it affects the performance of the algorithm.
Sourcepub fn mirostat_v2(seed: u32, tau: f32, eta: f32) -> Self
pub fn mirostat_v2(seed: u32, tau: f32, eta: f32) -> Self
Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
§Parameters:
seed: Seed to initialize random generation with.tau: The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.eta: The learning rate used to updatemubased on the error between the target and observed surprisal of the sampled word. A larger learning rate will causemuto be updated more quickly, while a smaller learning rate will result in slower updates.
Sourcepub fn greedy() -> Self
pub fn greedy() -> Self
Selects the most likely token
§Example:
use llama_cpp_2::token::{
LlamaToken,
data::LlamaTokenData,
data_array::LlamaTokenDataArray
};
use llama_cpp_2::sampling::LlamaSampler;
let mut data_array = LlamaTokenDataArray::new(vec![
LlamaTokenData::new(LlamaToken(0), 0., 0.),
LlamaTokenData::new(LlamaToken(1), 1., 0.),
], false);
data_array.apply_sampler(&mut LlamaSampler::greedy());
assert_eq!(data_array.data.len(), 2);
assert_eq!(data_array.selected_token(), Some(LlamaToken(1)));Sourcepub fn logit_bias(n_vocab: i32, biases: &[LlamaLogitBias]) -> Self
pub fn logit_bias(n_vocab: i32, biases: &[LlamaLogitBias]) -> Self
Creates a sampler that applies bias values to specific tokens during sampling.
§Parameters
n_vocab:LlamaModel::n_vocabbiases: Slice ofLlamaLogitBiasvalues specifying token-bias pairs
§Example
use llama_cpp_2::token::{LlamaToken, logit_bias::LlamaLogitBias};
use llama_cpp_2::sampling::LlamaSampler;
let biases = vec![
LlamaLogitBias::new(LlamaToken(1), 1.5), // Increase probability of token 1
LlamaLogitBias::new(LlamaToken(2), -1.0), // Decrease probability of token 2
];
// Assuming vocab_size of 32000
let sampler = LlamaSampler::logit_bias(32000, &biases);