pub struct Model { /* private fields */ }Expand description
A KenLM model loaded from an ARPA or KenLM binary file.
Implementations§
Source§impl Model
impl Model
Sourcepub fn new(path: impl AsRef<Path>) -> Result<Self>
pub fn new(path: impl AsRef<Path>) -> Result<Self>
Load a language model with default configuration.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let sentence = args.collect::<Vec<_>>().join(" ");
8 let sentence = if sentence.is_empty() {
9 "looking on a little"
10 } else {
11 sentence.as_str()
12 };
13
14 let model = Model::new(model_path)?;
15 let scores = model.full_scores(sentence, true, true)?;
16
17 for (word, score) in sentence
18 .split_whitespace()
19 .chain(std::iter::once("</s>"))
20 .zip(scores)
21 {
22 println!(
23 "{word}\tlog10={:.6}\tngram_length={}\toov={}",
24 score.log_prob, score.ngram_length, score.oov
25 );
26 }
27
28 Ok(())
29}More examples
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let sentence = args.collect::<Vec<_>>().join(" ");
8 let sentence = if sentence.is_empty() {
9 "looking on a little"
10 } else {
11 sentence.as_str()
12 };
13
14 let model = Model::new(model_path)?;
15 let score = model.score(sentence, true, true)?;
16 let fragment_score = model.score(sentence, false, false)?;
17 let perplexity = model.perplexity(sentence)?;
18
19 println!("sentence: {sentence}");
20 println!("order: {}", model.order());
21 println!("score with <s> and </s>: {score}");
22 println!("fragment score: {fragment_score}");
23 println!("perplexity: {perplexity}");
24
25 Ok(())
26}4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let words = args.collect::<Vec<_>>();
8 let words = if words.is_empty() {
9 vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10 } else {
11 words
12 };
13
14 let model = Model::new(model_path)?;
15 let mut state = model.begin_sentence_state();
16 let mut next = model.null_context_state();
17 let mut total = 0.0;
18
19 for word in &words {
20 let word_index = model.index(word)?;
21 let full = model.base_full_score(&state, word_index, &mut next)?;
22 total += full.log_prob;
23 println!(
24 "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25 full.log_prob, full.ngram_length
26 );
27 std::mem::swap(&mut state, &mut next);
28 }
29
30 let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31 total += eos.log_prob;
32 println!(
33 "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34 model.end_sentence_index(),
35 eos.log_prob,
36 eos.ngram_length
37 );
38 println!("total: {total}");
39
40 Ok(())
41}Sourcepub fn with_config(path: impl AsRef<Path>, config: Config) -> Result<Self>
pub fn with_config(path: impl AsRef<Path>, config: Config) -> Result<Self>
Load a language model with explicit configuration.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let model_path = env::args()
6 .nth(1)
7 .unwrap_or_else(|| "lm/test.arpa".to_string());
8
9 let config = Config {
10 show_progress: false,
11 arpa_complain: ArpaLoadComplain::None,
12 load_method: LoadMethod::Lazy,
13 ..Config::default()
14 };
15
16 let model = Model::with_config(model_path, config)?;
17
18 for word in ["looking", "definitely-not-in-this-model", "<s>", "</s>"] {
19 let index = model.index(word)?;
20 println!("{word}\tindex={index}\tin_vocab={}", model.contains(word)?);
21 }
22
23 Ok(())
24}Sourcepub fn order(&self) -> u8
pub fn order(&self) -> u8
Return the n-gram order of the model.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let sentence = args.collect::<Vec<_>>().join(" ");
8 let sentence = if sentence.is_empty() {
9 "looking on a little"
10 } else {
11 sentence.as_str()
12 };
13
14 let model = Model::new(model_path)?;
15 let score = model.score(sentence, true, true)?;
16 let fragment_score = model.score(sentence, false, false)?;
17 let perplexity = model.perplexity(sentence)?;
18
19 println!("sentence: {sentence}");
20 println!("order: {}", model.order());
21 println!("score with <s> and </s>: {score}");
22 println!("fragment score: {fragment_score}");
23 println!("perplexity: {perplexity}");
24
25 Ok(())
26}Sourcepub fn contains(&self, word: &str) -> Result<bool>
pub fn contains(&self, word: &str) -> Result<bool>
Return true when word exists in the model vocabulary.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let model_path = env::args()
6 .nth(1)
7 .unwrap_or_else(|| "lm/test.arpa".to_string());
8
9 let config = Config {
10 show_progress: false,
11 arpa_complain: ArpaLoadComplain::None,
12 load_method: LoadMethod::Lazy,
13 ..Config::default()
14 };
15
16 let model = Model::with_config(model_path, config)?;
17
18 for word in ["looking", "definitely-not-in-this-model", "<s>", "</s>"] {
19 let index = model.index(word)?;
20 println!("{word}\tindex={index}\tin_vocab={}", model.contains(word)?);
21 }
22
23 Ok(())
24}Sourcepub fn index(&self, word: &str) -> Result<WordIndex>
pub fn index(&self, word: &str) -> Result<WordIndex>
Return KenLM’s vocabulary index for word, or the not-found index for OOV words.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let model_path = env::args()
6 .nth(1)
7 .unwrap_or_else(|| "lm/test.arpa".to_string());
8
9 let config = Config {
10 show_progress: false,
11 arpa_complain: ArpaLoadComplain::None,
12 load_method: LoadMethod::Lazy,
13 ..Config::default()
14 };
15
16 let model = Model::with_config(model_path, config)?;
17
18 for word in ["looking", "definitely-not-in-this-model", "<s>", "</s>"] {
19 let index = model.index(word)?;
20 println!("{word}\tindex={index}\tin_vocab={}", model.contains(word)?);
21 }
22
23 Ok(())
24}More examples
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let words = args.collect::<Vec<_>>();
8 let words = if words.is_empty() {
9 vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10 } else {
11 words
12 };
13
14 let model = Model::new(model_path)?;
15 let mut state = model.begin_sentence_state();
16 let mut next = model.null_context_state();
17 let mut total = 0.0;
18
19 for word in &words {
20 let word_index = model.index(word)?;
21 let full = model.base_full_score(&state, word_index, &mut next)?;
22 total += full.log_prob;
23 println!(
24 "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25 full.log_prob, full.ngram_length
26 );
27 std::mem::swap(&mut state, &mut next);
28 }
29
30 let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31 total += eos.log_prob;
32 println!(
33 "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34 model.end_sentence_index(),
35 eos.log_prob,
36 eos.ngram_length
37 );
38 println!("total: {total}");
39
40 Ok(())
41}Sourcepub fn begin_sentence_index(&self) -> WordIndex
pub fn begin_sentence_index(&self) -> WordIndex
Return the index for <s>.
Sourcepub fn end_sentence_index(&self) -> WordIndex
pub fn end_sentence_index(&self) -> WordIndex
Return the index for </s>.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let words = args.collect::<Vec<_>>();
8 let words = if words.is_empty() {
9 vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10 } else {
11 words
12 };
13
14 let model = Model::new(model_path)?;
15 let mut state = model.begin_sentence_state();
16 let mut next = model.null_context_state();
17 let mut total = 0.0;
18
19 for word in &words {
20 let word_index = model.index(word)?;
21 let full = model.base_full_score(&state, word_index, &mut next)?;
22 total += full.log_prob;
23 println!(
24 "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25 full.log_prob, full.ngram_length
26 );
27 std::mem::swap(&mut state, &mut next);
28 }
29
30 let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31 total += eos.log_prob;
32 println!(
33 "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34 model.end_sentence_index(),
35 eos.log_prob,
36 eos.ngram_length
37 );
38 println!("total: {total}");
39
40 Ok(())
41}Sourcepub fn not_found_index(&self) -> WordIndex
pub fn not_found_index(&self) -> WordIndex
Return the vocabulary index used for out-of-vocabulary words.
Sourcepub fn score(&self, sentence: &str, bos: bool, eos: bool) -> Result<f32>
pub fn score(&self, sentence: &str, bos: bool, eos: bool) -> Result<f32>
Score a whitespace-tokenized sentence, returning log10 probability.
With bos = true and eos = true, this returns
log10 p(sentence </s> | <s>).
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let sentence = args.collect::<Vec<_>>().join(" ");
8 let sentence = if sentence.is_empty() {
9 "looking on a little"
10 } else {
11 sentence.as_str()
12 };
13
14 let model = Model::new(model_path)?;
15 let score = model.score(sentence, true, true)?;
16 let fragment_score = model.score(sentence, false, false)?;
17 let perplexity = model.perplexity(sentence)?;
18
19 println!("sentence: {sentence}");
20 println!("order: {}", model.order());
21 println!("score with <s> and </s>: {score}");
22 println!("fragment score: {fragment_score}");
23 println!("perplexity: {perplexity}");
24
25 Ok(())
26}Sourcepub fn score_words<'a>(
&self,
words: impl IntoIterator<Item = &'a str>,
bos: bool,
eos: bool,
) -> Result<f32>
pub fn score_words<'a>( &self, words: impl IntoIterator<Item = &'a str>, bos: bool, eos: bool, ) -> Result<f32>
Score pre-tokenized words, returning log10 probability.
Sourcepub fn perplexity(&self, sentence: &str) -> Result<f32>
pub fn perplexity(&self, sentence: &str) -> Result<f32>
Return perplexity for a complete whitespace-tokenized sentence.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let sentence = args.collect::<Vec<_>>().join(" ");
8 let sentence = if sentence.is_empty() {
9 "looking on a little"
10 } else {
11 sentence.as_str()
12 };
13
14 let model = Model::new(model_path)?;
15 let score = model.score(sentence, true, true)?;
16 let fragment_score = model.score(sentence, false, false)?;
17 let perplexity = model.perplexity(sentence)?;
18
19 println!("sentence: {sentence}");
20 println!("order: {}", model.order());
21 println!("score with <s> and </s>: {score}");
22 println!("fragment score: {fragment_score}");
23 println!("perplexity: {perplexity}");
24
25 Ok(())
26}Sourcepub fn full_scores(
&self,
sentence: &str,
bos: bool,
eos: bool,
) -> Result<Vec<TokenScore>>
pub fn full_scores( &self, sentence: &str, bos: bool, eos: bool, ) -> Result<Vec<TokenScore>>
Return per-token full scores for a whitespace-tokenized sentence.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let sentence = args.collect::<Vec<_>>().join(" ");
8 let sentence = if sentence.is_empty() {
9 "looking on a little"
10 } else {
11 sentence.as_str()
12 };
13
14 let model = Model::new(model_path)?;
15 let scores = model.full_scores(sentence, true, true)?;
16
17 for (word, score) in sentence
18 .split_whitespace()
19 .chain(std::iter::once("</s>"))
20 .zip(scores)
21 {
22 println!(
23 "{word}\tlog10={:.6}\tngram_length={}\toov={}",
24 score.log_prob, score.ngram_length, score.oov
25 );
26 }
27
28 Ok(())
29}Sourcepub fn full_scores_words<'a>(
&self,
words: impl IntoIterator<Item = &'a str>,
bos: bool,
eos: bool,
) -> Result<Vec<TokenScore>>
pub fn full_scores_words<'a>( &self, words: impl IntoIterator<Item = &'a str>, bos: bool, eos: bool, ) -> Result<Vec<TokenScore>>
Return per-token full scores for pre-tokenized words.
Sourcepub fn begin_sentence_state(&self) -> State
pub fn begin_sentence_state(&self) -> State
Create a state initialized to beginning-of-sentence context.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let words = args.collect::<Vec<_>>();
8 let words = if words.is_empty() {
9 vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10 } else {
11 words
12 };
13
14 let model = Model::new(model_path)?;
15 let mut state = model.begin_sentence_state();
16 let mut next = model.null_context_state();
17 let mut total = 0.0;
18
19 for word in &words {
20 let word_index = model.index(word)?;
21 let full = model.base_full_score(&state, word_index, &mut next)?;
22 total += full.log_prob;
23 println!(
24 "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25 full.log_prob, full.ngram_length
26 );
27 std::mem::swap(&mut state, &mut next);
28 }
29
30 let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31 total += eos.log_prob;
32 println!(
33 "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34 model.end_sentence_index(),
35 eos.log_prob,
36 eos.ngram_length
37 );
38 println!("total: {total}");
39
40 Ok(())
41}Sourcepub fn null_context_state(&self) -> State
pub fn null_context_state(&self) -> State
Create a state initialized to null context.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let words = args.collect::<Vec<_>>();
8 let words = if words.is_empty() {
9 vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10 } else {
11 words
12 };
13
14 let model = Model::new(model_path)?;
15 let mut state = model.begin_sentence_state();
16 let mut next = model.null_context_state();
17 let mut total = 0.0;
18
19 for word in &words {
20 let word_index = model.index(word)?;
21 let full = model.base_full_score(&state, word_index, &mut next)?;
22 total += full.log_prob;
23 println!(
24 "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25 full.log_prob, full.ngram_length
26 );
27 std::mem::swap(&mut state, &mut next);
28 }
29
30 let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31 total += eos.log_prob;
32 println!(
33 "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34 model.end_sentence_index(),
35 eos.log_prob,
36 eos.ngram_length
37 );
38 println!("total: {total}");
39
40 Ok(())
41}Sourcepub fn base_score(
&self,
in_state: &State,
word_index: WordIndex,
out_state: &mut State,
) -> Result<f32>
pub fn base_score( &self, in_state: &State, word_index: WordIndex, out_state: &mut State, ) -> Result<f32>
Score word_index from in_state, writing the next state into out_state.
Sourcepub fn base_full_score(
&self,
in_state: &State,
word_index: WordIndex,
out_state: &mut State,
) -> Result<FullScore>
pub fn base_full_score( &self, in_state: &State, word_index: WordIndex, out_state: &mut State, ) -> Result<FullScore>
Return KenLM’s full score metadata for a state transition.
Examples found in repository?
4fn main() -> Result<(), kenlm::KenlmError> {
5 let mut args = env::args().skip(1);
6 let model_path = args.next().unwrap_or_else(|| "lm/test.arpa".to_string());
7 let words = args.collect::<Vec<_>>();
8 let words = if words.is_empty() {
9 vec!["looking".to_string(), "on".to_string(), "a".to_string()]
10 } else {
11 words
12 };
13
14 let model = Model::new(model_path)?;
15 let mut state = model.begin_sentence_state();
16 let mut next = model.null_context_state();
17 let mut total = 0.0;
18
19 for word in &words {
20 let word_index = model.index(word)?;
21 let full = model.base_full_score(&state, word_index, &mut next)?;
22 total += full.log_prob;
23 println!(
24 "{word}\tindex={word_index}\tlog10={:.6}\tngram_length={}",
25 full.log_prob, full.ngram_length
26 );
27 std::mem::swap(&mut state, &mut next);
28 }
29
30 let eos = model.base_full_score(&state, model.end_sentence_index(), &mut next)?;
31 total += eos.log_prob;
32 println!(
33 "</s>\tindex={}\tlog10={:.6}\tngram_length={}",
34 model.end_sentence_index(),
35 eos.log_prob,
36 eos.ngram_length
37 );
38 println!("total: {total}");
39
40 Ok(())
41}