use std::sync::LazyLock;
use std::collections::HashSet;
pub static STOPWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
[
"the",
"a",
"an",
"i",
"you",
"he",
"she",
"it",
"we",
"they",
"me",
"him",
"her",
"us",
"them",
"my",
"your",
"his",
"its",
"our",
"their",
"this",
"that",
"these",
"those",
"is",
"are",
"was",
"were",
"be",
"been",
"being",
"have",
"has",
"had",
"having",
"do",
"does",
"did",
"done",
"will",
"would",
"shall",
"should",
"can",
"could",
"may",
"might",
"must",
"of",
"in",
"on",
"at",
"to",
"from",
"for",
"by",
"with",
"about",
"into",
"through",
"during",
"before",
"after",
"above",
"below",
"between",
"under",
"over",
"and",
"or",
"but",
"nor",
"so",
"yet",
"if",
"because",
"as",
"than",
"that",
"while",
"when",
"where",
"whether",
"although",
"not",
"no",
"yes",
"only",
"just",
"also",
"very",
"too",
"here",
"there",
"now",
"then",
"don't",
"doesn't",
"didn't",
"won't",
"wouldn't",
"can't",
"couldn't",
"shouldn't",
"isn't",
"aren't",
"wasn't",
"weren't",
"haven't",
"hasn't",
"hadn't",
"i'm",
"you're",
"he's",
"she's",
"it's",
"we're",
"they're",
"i've",
"you've",
"we've",
"they've",
"i'll",
"you'll",
"he'll",
"she'll",
"we'll",
"they'll",
]
.into_iter()
.collect()
});
pub static WEASELS: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
vec![
"some",
"many",
"often",
"just",
"simply",
"clearly",
"obviously",
"seemingly",
"arguably",
"basically",
"essentially",
"virtually",
"various",
"numerous",
"rather",
"quite",
"sort of",
"kind of",
"a bit",
]
});
pub static NEGATIONS: &[&str] = &[
"not", "no", "never", "none", "nothing", "nobody", "no-one", "noone", "nowhere", "neither",
"nor", "cannot", "without",
];
pub static CONDITIONALS: &[&str] = &[
"if",
"unless",
"when",
"whenever",
"while",
"until",
"provided",
"assuming",
"in case",
"as long as",
"as soon as",
"even if",
"only if",
];
pub static INTENSIFIERS: &[&str] = &[
"very",
"really",
"extremely",
"absolutely",
"totally",
"completely",
"utterly",
"terribly",
"awfully",
"incredibly",
"highly",
"deeply",
"super",
];
pub static HOMOPHONE_GROUPS_EN: &[&[&str]] = &[
&["their", "there", "they're"],
&["your", "you're"],
&["to", "too", "two"],
&["its", "it's"],
&["affect", "effect"],
&["principal", "principle"],
&["weather", "whether"],
&["lose", "loose"],
];
pub static SPELLED_NUMERALS: &[&str] = &[
"two",
"three",
"four",
"five",
"six",
"seven",
"eight",
"nine",
"ten",
"eleven",
"twelve",
"thirteen",
"fourteen",
"fifteen",
"sixteen",
"seventeen",
"eighteen",
"nineteen",
"twenty",
"thirty",
"forty",
"fifty",
"sixty",
"seventy",
"eighty",
"ninety",
"hundred",
"thousand",
"million",
"billion",
];
pub static ANCHOR_COMPARATORS_EN: &[&str] = &[
"out of",
"of every",
"as many as",
"the size of",
"the population of",
"compared to",
"compared with",
"equivalent to",
"the equivalent of",
"equal to",
"roughly",
"approximately",
"about ",
"around ",
"more than",
"less than",
"no more than",
"no less than",
"at least",
"at most",
"up to",
"averaging",
"an average of",
"translates to",
"amounts to",
"near ",
"nearly ",
];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn contains_common_articles() {
assert!(STOPWORDS.contains("the"));
assert!(STOPWORDS.contains("a"));
assert!(STOPWORDS.contains("an"));
}
#[test]
fn contains_common_auxiliaries() {
assert!(STOPWORDS.contains("is"));
assert!(STOPWORDS.contains("have"));
assert!(STOPWORDS.contains("will"));
}
#[test]
fn does_not_contain_content_words() {
assert!(!STOPWORDS.contains("accessibility"));
assert!(!STOPWORDS.contains("linter"));
}
}