Struct Model

Source

pub struct Model { /* private fields */ }

Expand description

A KenLM model loaded from an ARPA or KenLM binary file.

Implementations§

Source §

impl Model

Source

pub fn new(path: impl AsRef<Path>) -> Result<Self>

Load a language model with default configuration.

Examples found in repository ?

examples/full_scores.rs (line 14)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let sentence = args.collect::<Vec<_>>().join(" ");
8    let sentence = if sentence.is_empty() {
9        "looking on a little"
10    } else {
11        sentence.as_str()
12    };
13
14    let model = Model::new(model_path)?;
15    let scores = model.full_scores(sentence, true, true)?;
16
17    for (word, score) in sentence
18        .split_whitespace()
19        .chain(std::iter::once("</s>"))
20        .zip(scores)
21    {
22        println!(
23            "{word}\tlog10={:.6}\tngram_length={}\toov={}",
24            score.log_prob, score.ngram_length, score.oov
25        );
26    }
27
28    Ok(())
29}

More examples

Hide additional examples

examples/score_sentence.rs (line 14)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let sentence = args.collect::<Vec<_>>().join(" ");
8    let sentence = if sentence.is_empty() {
9        "looking on a little"
10    } else {
11        sentence.as_str()
12    };
13
14    let model = Model::new(model_path)?;
15    let score = model.score(sentence, true, true)?;
16    let fragment_score = model.score(sentence, false, false)?;
17    let perplexity = model.perplexity(sentence)?;
18
19    println!("sentence: {sentence}");
20    println!("order: {}", model.order());
21    println!("score with <s> and </s>: {score}");
22    println!("fragment score: {fragment_score}");
23    println!("perplexity: {perplexity}");
24
25    Ok(())
26}

examples/stateful_scoring.rs (line 14)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let words = args.collect::<Vec<_>>();
8    let words = if words.is_empty() {
9        vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10    } else {
11        words
12    };
13
14    let model = Model::new(model_path)?;
15    let mut state = model.begin_sentence_state();
16    let mut next = model.null_context_state();
17    let mut total = 0.0;
18
19    for word in &words {
20        let word_index = model.index(word)?;
21        let full = model.base_full_score(&state, word_index, &mut next)?;
22        total += full.log_prob;
23        println!(
24            "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25            full.log_prob, full.ngram_length
26        );
27        std::mem::swap(&mut state, &mut next);
28    }
29
30    let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31    total += eos.log_prob;
32    println!(
33        "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34        model.end_sentence_index(),
35        eos.log_prob,
36        eos.ngram_length
37    );
38    println!("total: {total}");
39
40    Ok(())
41}

Source

pub fn with_config(path: impl AsRef<Path>, config: Config) -> Result<Self>

Load a language model with explicit configuration.

Examples found in repository ?

examples/custom_config.rs (line 16)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let model_path = env::args()
6        .nth(1)
7        .unwrap_or_else(|| "lm/test.arpa".to_string());
8
9    let config = Config {
10        show_progress: false,
11        arpa_complain: ArpaLoadComplain::None,
12        load_method: LoadMethod::Lazy,
13        ..Config::default()
14    };
15
16    let model = Model::with_config(model_path, config)?;
17
18    for word in ["looking", "definitely-not-in-this-model", "<s>", "</s>"] {
19        let index = model.index(word)?;
20        println!("{word}\tindex={index}\tin_vocab={}", model.contains(word)?);
21    }
22
23    Ok(())
24}

Source

pub fn order(&self) -> u8

Return the n-gram order of the model.

Examples found in repository ?

examples/score_sentence.rs (line 20)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let sentence = args.collect::<Vec<_>>().join(" ");
8    let sentence = if sentence.is_empty() {
9        "looking on a little"
10    } else {
11        sentence.as_str()
12    };
13
14    let model = Model::new(model_path)?;
15    let score = model.score(sentence, true, true)?;
16    let fragment_score = model.score(sentence, false, false)?;
17    let perplexity = model.perplexity(sentence)?;
18
19    println!("sentence: {sentence}");
20    println!("order: {}", model.order());
21    println!("score with <s> and </s>: {score}");
22    println!("fragment score: {fragment_score}");
23    println!("perplexity: {perplexity}");
24
25    Ok(())
26}

Source

pub fn contains(&self, word: &str) -> Result<bool>

Return true when word exists in the model vocabulary.

Examples found in repository ?

examples/custom_config.rs (line 20)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let model_path = env::args()
6        .nth(1)
7        .unwrap_or_else(|| "lm/test.arpa".to_string());
8
9    let config = Config {
10        show_progress: false,
11        arpa_complain: ArpaLoadComplain::None,
12        load_method: LoadMethod::Lazy,
13        ..Config::default()
14    };
15
16    let model = Model::with_config(model_path, config)?;
17
18    for word in ["looking", "definitely-not-in-this-model", "<s>", "</s>"] {
19        let index = model.index(word)?;
20        println!("{word}\tindex={index}\tin_vocab={}", model.contains(word)?);
21    }
22
23    Ok(())
24}

Source

pub fn index(&self, word: &str) -> Result<WordIndex>

Return KenLM’s vocabulary index for word, or the not-found index for OOV words.

Examples found in repository ?

examples/custom_config.rs (line 19)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let model_path = env::args()
6        .nth(1)
7        .unwrap_or_else(|| "lm/test.arpa".to_string());
8
9    let config = Config {
10        show_progress: false,
11        arpa_complain: ArpaLoadComplain::None,
12        load_method: LoadMethod::Lazy,
13        ..Config::default()
14    };
15
16    let model = Model::with_config(model_path, config)?;
17
18    for word in ["looking", "definitely-not-in-this-model", "<s>", "</s>"] {
19        let index = model.index(word)?;
20        println!("{word}\tindex={index}\tin_vocab={}", model.contains(word)?);
21    }
22
23    Ok(())
24}

More examples

Hide additional examples

examples/stateful_scoring.rs (line 20)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let words = args.collect::<Vec<_>>();
8    let words = if words.is_empty() {
9        vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10    } else {
11        words
12    };
13
14    let model = Model::new(model_path)?;
15    let mut state = model.begin_sentence_state();
16    let mut next = model.null_context_state();
17    let mut total = 0.0;
18
19    for word in &words {
20        let word_index = model.index(word)?;
21        let full = model.base_full_score(&state, word_index, &mut next)?;
22        total += full.log_prob;
23        println!(
24            "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25            full.log_prob, full.ngram_length
26        );
27        std::mem::swap(&mut state, &mut next);
28    }
29
30    let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31    total += eos.log_prob;
32    println!(
33        "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34        model.end_sentence_index(),
35        eos.log_prob,
36        eos.ngram_length
37    );
38    println!("total: {total}");
39
40    Ok(())
41}

Source

pub fn begin_sentence_index(&self) -> WordIndex

Return the index for <s>.

Source

pub fn end_sentence_index(&self) -> WordIndex

Return the index for </s>.

Examples found in repository ?

examples/stateful_scoring.rs (line 30)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let words = args.collect::<Vec<_>>();
8    let words = if words.is_empty() {
9        vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10    } else {
11        words
12    };
13
14    let model = Model::new(model_path)?;
15    let mut state = model.begin_sentence_state();
16    let mut next = model.null_context_state();
17    let mut total = 0.0;
18
19    for word in &words {
20        let word_index = model.index(word)?;
21        let full = model.base_full_score(&state, word_index, &mut next)?;
22        total += full.log_prob;
23        println!(
24            "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25            full.log_prob, full.ngram_length
26        );
27        std::mem::swap(&mut state, &mut next);
28    }
29
30    let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31    total += eos.log_prob;
32    println!(
33        "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34        model.end_sentence_index(),
35        eos.log_prob,
36        eos.ngram_length
37    );
38    println!("total: {total}");
39
40    Ok(())
41}

Source

pub fn not_found_index(&self) -> WordIndex

Return the vocabulary index used for out-of-vocabulary words.

Source

pub fn score(&self, sentence: &str, bos: bool, eos: bool) -> Result<f32>

Score a whitespace-tokenized sentence, returning log10 probability.

With bos = true and eos = true, this returns log10 p(sentence </s> | <s>).

Examples found in repository ?

examples/score_sentence.rs (line 15)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let sentence = args.collect::<Vec<_>>().join(" ");
8    let sentence = if sentence.is_empty() {
9        "looking on a little"
10    } else {
11        sentence.as_str()
12    };
13
14    let model = Model::new(model_path)?;
15    let score = model.score(sentence, true, true)?;
16    let fragment_score = model.score(sentence, false, false)?;
17    let perplexity = model.perplexity(sentence)?;
18
19    println!("sentence: {sentence}");
20    println!("order: {}", model.order());
21    println!("score with <s> and </s>: {score}");
22    println!("fragment score: {fragment_score}");
23    println!("perplexity: {perplexity}");
24
25    Ok(())
26}

Source

pub fn score_words<'a>( &self, words: impl IntoIterator<Item = &'a str>, bos: bool, eos: bool, ) -> Result<f32>

Score pre-tokenized words, returning log10 probability.

Source

pub fn perplexity(&self, sentence: &str) -> Result<f32>

Return perplexity for a complete whitespace-tokenized sentence.

Examples found in repository ?

examples/score_sentence.rs (line 17)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let sentence = args.collect::<Vec<_>>().join(" ");
8    let sentence = if sentence.is_empty() {
9        "looking on a little"
10    } else {
11        sentence.as_str()
12    };
13
14    let model = Model::new(model_path)?;
15    let score = model.score(sentence, true, true)?;
16    let fragment_score = model.score(sentence, false, false)?;
17    let perplexity = model.perplexity(sentence)?;
18
19    println!("sentence: {sentence}");
20    println!("order: {}", model.order());
21    println!("score with <s> and </s>: {score}");
22    println!("fragment score: {fragment_score}");
23    println!("perplexity: {perplexity}");
24
25    Ok(())
26}

Source

pub fn full_scores( &self, sentence: &str, bos: bool, eos: bool, ) -> Result<Vec<TokenScore>>

Return per-token full scores for a whitespace-tokenized sentence.

Examples found in repository ?

examples/full_scores.rs (line 15)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let sentence = args.collect::<Vec<_>>().join(" ");
8    let sentence = if sentence.is_empty() {
9        "looking on a little"
10    } else {
11        sentence.as_str()
12    };
13
14    let model = Model::new(model_path)?;
15    let scores = model.full_scores(sentence, true, true)?;
16
17    for (word, score) in sentence
18        .split_whitespace()
19        .chain(std::iter::once("</s>"))
20        .zip(scores)
21    {
22        println!(
23            "{word}\tlog10={:.6}\tngram_length={}\toov={}",
24            score.log_prob, score.ngram_length, score.oov
25        );
26    }
27
28    Ok(())
29}

Source

pub fn full_scores_words<'a>( &self, words: impl IntoIterator<Item = &'a str>, bos: bool, eos: bool, ) -> Result<Vec<TokenScore>>

Return per-token full scores for pre-tokenized words.

Source

pub fn begin_sentence_state(&self) -> State

Create a state initialized to beginning-of-sentence context.

Examples found in repository ?

examples/stateful_scoring.rs (line 15)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let words = args.collect::<Vec<_>>();
8    let words = if words.is_empty() {
9        vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10    } else {
11        words
12    };
13
14    let model = Model::new(model_path)?;
15    let mut state = model.begin_sentence_state();
16    let mut next = model.null_context_state();
17    let mut total = 0.0;
18
19    for word in &words {
20        let word_index = model.index(word)?;
21        let full = model.base_full_score(&state, word_index, &mut next)?;
22        total += full.log_prob;
23        println!(
24            "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25            full.log_prob, full.ngram_length
26        );
27        std::mem::swap(&mut state, &mut next);
28    }
29
30    let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31    total += eos.log_prob;
32    println!(
33        "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34        model.end_sentence_index(),
35        eos.log_prob,
36        eos.ngram_length
37    );
38    println!("total: {total}");
39
40    Ok(())
41}

Source

pub fn null_context_state(&self) -> State

Create a state initialized to null context.

Examples found in repository ?

examples/stateful_scoring.rs (line 16)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let words = args.collect::<Vec<_>>();
8    let words = if words.is_empty() {
9        vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10    } else {
11        words
12    };
13
14    let model = Model::new(model_path)?;
15    let mut state = model.begin_sentence_state();
16    let mut next = model.null_context_state();
17    let mut total = 0.0;
18
19    for word in &words {
20        let word_index = model.index(word)?;
21        let full = model.base_full_score(&state, word_index, &mut next)?;
22        total += full.log_prob;
23        println!(
24            "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25            full.log_prob, full.ngram_length
26        );
27        std::mem::swap(&mut state, &mut next);
28    }
29
30    let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31    total += eos.log_prob;
32    println!(
33        "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34        model.end_sentence_index(),
35        eos.log_prob,
36        eos.ngram_length
37    );
38    println!("total: {total}");
39
40    Ok(())
41}

Source

pub fn base_score( &self, in_state: &State, word_index: WordIndex, out_state: &mut State, ) -> Result<f32>

Score word_index from in_state, writing the next state into out_state.

Source

pub fn base_full_score( &self, in_state: &State, word_index: WordIndex, out_state: &mut State, ) -> Result<FullScore>

Return KenLM’s full score metadata for a state transition.

Examples found in repository ?

examples/stateful_scoring.rs (line 21)

4fn main() -> Result<(), kenlm::KenlmError> {
5    let mut args = env::args().skip(1);
6    let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7    let words = args.collect::<Vec<_>>();
8    let words = if words.is_empty() {
9        vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10    } else {
11        words
12    };
13
14    let model = Model::new(model_path)?;
15    let mut state = model.begin_sentence_state();
16    let mut next = model.null_context_state();
17    let mut total = 0.0;
18
19    for word in &words {
20        let word_index = model.index(word)?;
21        let full = model.base_full_score(&state, word_index, &mut next)?;
22        total += full.log_prob;
23        println!(
24            "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25            full.log_prob, full.ngram_length
26        );
27        std::mem::swap(&mut state, &mut next);
28    }
29
30    let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31    total += eos.log_prob;
32    println!(
33        "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34        model.end_sentence_index(),
35        eos.log_prob,
36        eos.ngram_length
37    );
38    println!("total: {total}");
39
40    Ok(())
41}