1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
use lazy_static::lazy_static;
use std::collections::BTreeSet;
#[cfg(feature = "textrank")]
pub mod textrank;
#[cfg(feature = "tfidf")]
pub mod tfidf;
lazy_static! {
pub static ref STOP_WORDS: BTreeSet<String> = {
let mut set = BTreeSet::new();
let words = [
"the", "of", "is", "and", "to", "in", "that", "we", "for", "an", "are", "by", "be", "as", "on", "with",
"can", "if", "from", "which", "you", "it", "this", "then", "at", "have", "all", "not", "one", "has", "or",
"that",
];
for &s in words.iter() {
set.insert(String::from(s));
}
set
};
}
#[derive(Debug, Clone)]
pub struct Keyword {
pub keyword: String,
pub weight: f64,
}
pub trait KeywordExtract {
fn extract_tags(&self, sentence: &str, top_k: usize, allowed_pos: Vec<String>) -> Vec<Keyword>;
}