Struct llama_cpp_2::token::data_array::LlamaTokenDataArray

source ·

pub struct LlamaTokenDataArray {
    pub data: Vec<LlamaTokenData>,
    pub sorted: bool,
}

Expand description

a safe wrapper around llama_token_data_array.

Fields§

§data: Vec<LlamaTokenData>

the underlying data

§sorted: bool

is the data sorted?

Implementations§

source §

impl LlamaTokenDataArray

source

pub fn new(data: Vec<LlamaTokenData>, sorted: bool) -> Self

Create a new LlamaTokenDataArray from a vector and weather or not the data is sorted.

let array = LlamaTokenDataArray::new(vec![
        LlamaTokenData::new(LlamaToken(0), 0.0, 0.0),
        LlamaTokenData::new(LlamaToken(1), 0.1, 0.1)
   ], false);
assert_eq!(array.data.len(), 2);
assert_eq!(array.sorted, false);

source

pub fn from_iter<T>(data: T, sorted: bool) -> LlamaTokenDataArray
where T: IntoIterator<Item = LlamaTokenData>,

Create a new LlamaTokenDataArray from an iterator and weather or not the data is sorted.

let array = LlamaTokenDataArray::from_iter([
    LlamaTokenData::new(LlamaToken(0), 0.0, 0.0),
    LlamaTokenData::new(LlamaToken(1), 0.1, 0.1)
], false);
assert_eq!(array.data.len(), 2);
assert_eq!(array.sorted, false);

source §

impl LlamaTokenDataArray

source

pub fn sample_repetition_penalty( &mut self, ctx: Option<&mut LlamaContext<'_>>, last_tokens: &[LlamaToken], penalty_last_n: usize, penalty_repeat: f32, penalty_freq: f32, penalty_present: f32, )

Repetition penalty described in CTRL academic paper, with negative logit fix. Frequency and presence penalties described in OpenAI API.

§Parameters

ctx - the context to use. May be None if you do not care to record the sample timings.
last_tokens - the last tokens in the context.
penalty_last_n - the number of tokens back to consider for the repetition penalty. (0 for no penalty)
penalty_repeat - the repetition penalty. (1.0 for no penalty)
penalty_freq - the frequency penalty. (0.0 for no penalty)
penalty_present - the presence penalty. (0.0 for no penalty)

§Example

let history = vec![
  LlamaToken::new(2),
  LlamaToken::new(1),
  LlamaToken::new(0),
];

let candidates = vec![
   LlamaToken::new(0),
   LlamaToken::new(1),
   LlamaToken::new(2),
   LlamaToken::new(3),
];

let mut candidates = LlamaTokenDataArray::from_iter(candidates.iter().map(|&token| LlamaTokenData::new(token, 0.0, 0.0)), false);

candidates.sample_repetition_penalty(None, &history, 2, 1.1, 0.1, 0.1);

let token_logits = candidates.data.into_iter().map(|token_data| (token_data.id(), token_data.logit())).collect::<BTreeMap<_, _>>();
assert_eq!(token_logits[&LlamaToken(0)], 0.0, "expected no penalty as it is out of `penalty_last_n`");
assert!(token_logits[&LlamaToken(1)] < 0.0, "expected penalty as it is in `penalty_last_n`");
assert!(token_logits[&LlamaToken(2)] < 0.0, "expected penalty as it is in `penalty_last_n`");
assert_eq!(token_logits[&LlamaToken(3)], 0.0, "expected no penalty as it is not in `history`");

source

pub fn sample_softmax(&mut self, ctx: Option<&mut LlamaContext<'_>>)

Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.

§Example


let lowest = LlamaTokenData::new(LlamaToken::new(0), 0.1, 0.0);
let middle = LlamaTokenData::new(LlamaToken::new(1), 0.2, 0.0);
let highest = LlamaTokenData::new(LlamaToken::new(2), 0.7, 0.0);

let candidates = vec![lowest, middle, highest];

let mut candidates = LlamaTokenDataArray::from_iter(candidates, false);
candidates.sample_softmax(None);

assert!(candidates.sorted);
assert_eq!(candidates.data[0].id(), highest.id());
assert_eq!(candidates.data[0].logit(), highest.logit());
assert!(candidates.data[0].p() > candidates.data[1].p());
assert_eq!(candidates.data[1].id(), middle.id());
assert_eq!(candidates.data[1].logit(), middle.logit());
assert!(candidates.data[1].p() > candidates.data[2].p());
assert_eq!(candidates.data[2].id(), lowest.id());
assert_eq!(candidates.data[2].logit(), lowest.logit());

source

pub fn sample_temp( &mut self, ctx: Option<&mut LlamaContext<'_>>, temperature: f32, )

Modify the logits of Self in place using temperature sampling.

§Example


let candidates = vec![
    LlamaTokenData::new(LlamaToken::new(0), 0.1, 0.0),
    LlamaTokenData::new(LlamaToken::new(1), 0.2, 0.0),
    LlamaTokenData::new(LlamaToken::new(2), 0.7, 0.0)
];
let mut candidates = LlamaTokenDataArray::from_iter(candidates, false);

candidates.sample_temp(None, 0.5);

assert_ne!(candidates.data[0].logit(), 0.1);
assert_ne!(candidates.data[1].logit(), 0.2);
assert_ne!(candidates.data[2].logit(), 0.7);

source

pub fn sample_token(&mut self, ctx: &mut LlamaContext<'_>) -> LlamaToken

Randomly selects a token from the candidates based on their probabilities.

source

pub fn sample_top_k( &mut self, ctx: Option<&mut LlamaContext<'_>>, k: i32, min_keep: usize, )

Top-K sampling described in academic paper The Curious Case of Neural Text Degeneration

source

pub fn sample_tail_free( &mut self, ctx: Option<&mut LlamaContext<'_>>, z: f32, min_keep: usize, )

Tail Free Sampling described in Tail-Free-Sampling.

source

pub fn sample_typical( &mut self, ctx: Option<&mut LlamaContext<'_>>, p: f32, min_keep: usize, )

Locally Typical Sampling implementation described in the paper.

§Example



let candidates = vec![
   LlamaTokenData::new(LlamaToken::new(0), 0.1, 0.0),
   LlamaTokenData::new(LlamaToken::new(1), 0.2, 0.0),
   LlamaTokenData::new(LlamaToken::new(2), 0.7, 0.0),
];
let mut candidates = LlamaTokenDataArray::from_iter(candidates, false);
candidates.sample_typical(None, 0.5, 1);

source

pub fn sample_top_p( &mut self, ctx: Option<&mut LlamaContext<'_>>, p: f32, min_keep: usize, )

Nucleus sampling described in academic paper The Curious Case of Neural Text Degeneration

§Example



let candidates = vec![
  LlamaTokenData::new(LlamaToken::new(0), 0.1, 0.0),
  LlamaTokenData::new(LlamaToken::new(1), 0.2, 0.0),
  LlamaTokenData::new(LlamaToken::new(2), 0.7, 0.0),
];

let mut candidates = LlamaTokenDataArray::from_iter(candidates, false);
candidates.sample_top_p(None, 0.5, 1);

assert_eq!(candidates.data.len(), 2);
assert_eq!(candidates.data[0].id(), LlamaToken::new(2));
assert_eq!(candidates.data[1].id(), LlamaToken::new(1));

source

pub fn sample_min_p( &mut self, ctx: Option<&mut LlamaContext<'_>>, p: f32, min_keep: usize, )

Minimum P sampling as described in #3841

§Example


let candidates = vec![
  LlamaTokenData::new(LlamaToken::new(4), 0.0001, 0.0),
  LlamaTokenData::new(LlamaToken::new(0), 0.1, 0.0),
  LlamaTokenData::new(LlamaToken::new(1), 0.2, 0.0),
  LlamaTokenData::new(LlamaToken::new(2), 0.7, 0.0),
];
let mut candidates = LlamaTokenDataArray::from_iter(candidates, false);
candidates.sample_min_p(None, 0.05, 1);

source

pub fn sample_token_mirostat_v2( &mut self, ctx: &mut LlamaContext<'_>, tau: f32, eta: f32, mu: &mut f32, ) -> LlamaToken

Mirostat 2.0 algorithm described in the paper. Uses tokens instead of words.

§Parameters

tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates.
mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.