Expand description

Create a sampler struct to encapsulate the sampling process. This allows passing all the possible sampling parameters around as a single struct, and also allow late binding of expensive context like crate::context::LlamaContext or token history to the sampler.

§Example

Llama.cpp default sampler

use llama_cpp_2::context::sample::sampler::{Sampler, SampleStep};
use llama_cpp_2::token::data::LlamaTokenData;
use llama_cpp_2::token::data_array::LlamaTokenDataArray;
use llama_cpp_2::token::LlamaToken;

// Sample a token greedily and add to the history.
let mut finalizer = &|mut canidates: LlamaTokenDataArray, history: &mut Vec<LlamaToken>| {
    canidates.sample_softmax(None);
    let token = canidates.data[0];
    history.push(token.id());
    vec![token]
};

let mut history = vec![];
let mut sampler = Sampler::new(finalizer);

sampler.push_step(&|c, history| c.sample_repetition_penalty(None, history, 64, 1.1, 0.0, 0.0));
sampler.push_step(&|c, _| c.sample_top_k(None, 40, 1));
sampler.push_step(&|c, _| c.sample_tail_free(None, 1.0, 1));
sampler.push_step(&|c, _| c.sample_typical(None, 1.0, 1));
sampler.push_step(&|c, _| c.sample_top_p(None, 0.95, 1));
sampler.push_step(&|c, _| c.sample_min_p(None, 0.05, 1));
sampler.push_step(&|c, _| c.sample_temp(None, 0.5));

// random candidates
let candidates = LlamaTokenDataArray::from_iter((0..4).map(|i| LlamaTokenData::new(LlamaToken::new(i), i as f32 / 6.0, 0.0)), false);

for _ in 0..10 {
   let tokens = sampler.sample(&mut history, candidates.clone());
   assert_eq!(tokens.len(), 1);
}

assert_eq!(history.len(), 10);

Structs§

  • A series of sampling steps that will produce a vector of token data.

Type Aliases§

  • The final step to select tokens from the remaining candidates.
  • A single step to sample tokens from the remaining candidates.