use std::sync::LazyLock;
use std::collections::HashSet;
pub static STOPWORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
[
"the",
"a",
"an",
"i",
"you",
"he",
"she",
"it",
"we",
"they",
"me",
"him",
"her",
"us",
"them",
"my",
"your",
"his",
"its",
"our",
"their",
"this",
"that",
"these",
"those",
"is",
"are",
"was",
"were",
"be",
"been",
"being",
"have",
"has",
"had",
"having",
"do",
"does",
"did",
"done",
"will",
"would",
"shall",
"should",
"can",
"could",
"may",
"might",
"must",
"of",
"in",
"on",
"at",
"to",
"from",
"for",
"by",
"with",
"about",
"into",
"through",
"during",
"before",
"after",
"above",
"below",
"between",
"under",
"over",
"and",
"or",
"but",
"nor",
"so",
"yet",
"if",
"because",
"as",
"than",
"that",
"while",
"when",
"where",
"whether",
"although",
"not",
"no",
"yes",
"only",
"just",
"also",
"very",
"too",
"here",
"there",
"now",
"then",
"don't",
"doesn't",
"didn't",
"won't",
"wouldn't",
"can't",
"couldn't",
"shouldn't",
"isn't",
"aren't",
"wasn't",
"weren't",
"haven't",
"hasn't",
"hadn't",
"i'm",
"you're",
"he's",
"she's",
"it's",
"we're",
"they're",
"i've",
"you've",
"we've",
"they've",
"i'll",
"you'll",
"he'll",
"she'll",
"we'll",
"they'll",
]
.into_iter()
.collect()
});
pub static WEASELS: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
vec![
"some",
"many",
"often",
"just",
"simply",
"clearly",
"obviously",
"seemingly",
"arguably",
"basically",
"essentially",
"virtually",
"various",
"numerous",
"rather",
"quite",
"sort of",
"kind of",
"a bit",
]
});
pub static NEGATIONS: &[&str] = &[
"not", "no", "never", "none", "nothing", "nobody", "no-one", "noone", "nowhere", "neither",
"nor", "cannot", "without",
];
pub static CONDITIONALS: &[&str] = &[
"if",
"unless",
"when",
"whenever",
"while",
"until",
"provided",
"assuming",
"in case",
"as long as",
"as soon as",
"even if",
"only if",
];
pub static INTENSIFIERS: &[&str] = &[
"very",
"really",
"extremely",
"absolutely",
"totally",
"completely",
"utterly",
"terribly",
"awfully",
"incredibly",
"highly",
"deeply",
"super",
];
pub static SPELLED_NUMERALS: &[&str] = &[
"two",
"three",
"four",
"five",
"six",
"seven",
"eight",
"nine",
"ten",
"eleven",
"twelve",
"thirteen",
"fourteen",
"fifteen",
"sixteen",
"seventeen",
"eighteen",
"nineteen",
"twenty",
"thirty",
"forty",
"fifty",
"sixty",
"seventy",
"eighty",
"ninety",
"hundred",
"thousand",
"million",
"billion",
];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn contains_common_articles() {
assert!(STOPWORDS.contains("the"));
assert!(STOPWORDS.contains("a"));
assert!(STOPWORDS.contains("an"));
}
#[test]
fn contains_common_auxiliaries() {
assert!(STOPWORDS.contains("is"));
assert!(STOPWORDS.contains("have"));
assert!(STOPWORDS.contains("will"));
}
#[test]
fn does_not_contain_content_words() {
assert!(!STOPWORDS.contains("accessibility"));
assert!(!STOPWORDS.contains("linter"));
}
}