Function rnltk::token::get_stemmed_term_frequencies_from_sentences_without_stop_words

source ·

pub fn get_stemmed_term_frequencies_from_sentences_without_stop_words(
    sentences: &[&str],
    stop_words: Vec<String>
) -> Vec<BTreeMap<String, f64>>

Expand description

Gets a count of all stemmed words from a vector of sentences without stop_words.

Examples

use std::collections::BTreeMap;
use rnltk::token;
 
let sentences = vec!["fear leads to anger", "anger leads to hatred", "hatred leads to conflict", "conflict leads to suffering."];
let stop_words = token::get_stop_words();
let word_counts1 = BTreeMap::from([
    ("fear".to_string(), 1.), ("lead".to_string(), 1.), ("to".to_string(), 1.), ("anger".to_string(), 1.), ("hatr".to_string(), 0.), ("conflict".to_string(), 0.), ("suffer".to_string(), 0.)
]);
let word_counts2 = BTreeMap::from([
    ("fear".to_string(), 0.), ("lead".to_string(), 1.), ("to".to_string(), 1.), ("anger".to_string(), 1.), ("hatr".to_string(), 1.), ("conflict".to_string(), 0.), ("suffer".to_string(), 0.)
]);
let word_counts3 = BTreeMap::from([
    ("fear".to_string(), 0.), ("lead".to_string(), 1.), ("to".to_string(), 1.), ("anger".to_string(), 0.), ("hatr".to_string(), 1.), ("conflict".to_string(),1.), ("suffer".to_string(), 0.)
]);
let word_counts4 = BTreeMap::from([
    ("fear".to_string(), 0.), ("lead".to_string(), 1.), ("to".to_string(), 1.), ("anger".to_string(), 0.), ("hatr".to_string(), 0.), ("conflict".to_string(), 1.), ("suffer".to_string(), 1.)
]);
let term_frequencies = token::get_stemmed_term_frequencies_from_sentences(&sentences);

assert_eq!(vec![word_counts1, word_counts2, word_counts3, word_counts4], term_frequencies);