use std::ops::{Deref, DerefMut};
use crate::LTerm;
#[derive(Debug, Default, Clone, Eq, PartialEq)]
pub struct StopWords {
set: hashbrown::HashSet<LTerm>,
}
impl StopWords {
pub fn custom(lowercased: std::collections::HashSet<LTerm>) -> Self {
Self::from(lowercased)
}
pub fn predefined(lang_iso_639_2: &str) -> Option<Self> {
let file = match lang_iso_639_2 {
#[cfg(feature = "ar")]
"ar" => include_str!("stopwords/ar.txt"),
#[cfg(feature = "bg")]
"bg" => include_str!("stopwords/bg.txt"),
#[cfg(feature = "br")]
"br" => include_str!("stopwords/br.txt"),
#[cfg(feature = "cz")]
"cz" => include_str!("stopwords/cz.txt"),
#[cfg(feature = "da")]
"da" => include_str!("stopwords/da.txt"),
#[cfg(feature = "de")]
"de" => include_str!("stopwords/de.txt"),
#[cfg(feature = "el")]
"el" => include_str!("stopwords/el.txt"),
#[cfg(feature = "en")]
"en" => include_str!("stopwords/en.txt"),
#[cfg(feature = "es")]
"es" => include_str!("stopwords/es.txt"),
#[cfg(feature = "et")]
"et" => include_str!("stopwords/et.txt"),
#[cfg(feature = "fa")]
"fa" => include_str!("stopwords/fa.txt"),
#[cfg(feature = "fi")]
"fi" => include_str!("stopwords/fi.txt"),
#[cfg(feature = "fr")]
"fr" => include_str!("stopwords/fr.txt"),
#[cfg(feature = "hi")]
"hi" => include_str!("stopwords/hi.txt"),
#[cfg(feature = "hr")]
"hr" => include_str!("stopwords/hr.txt"),
#[cfg(feature = "hu")]
"hu" => include_str!("stopwords/hu.txt"),
#[cfg(feature = "hy")]
"hy" => include_str!("stopwords/hy.txt"),
#[cfg(feature = "id")]
"id" => include_str!("stopwords/id.txt"),
#[cfg(feature = "it")]
"it" => include_str!("stopwords/it.txt"),
#[cfg(feature = "ja")]
"ja" => include_str!("stopwords/ja.txt"),
#[cfg(feature = "lt")]
"lt" => include_str!("stopwords/lt.txt"),
#[cfg(feature = "lv")]
"lv" => include_str!("stopwords/lv.txt"),
#[cfg(feature = "nl")]
"nl" => include_str!("stopwords/nl.txt"),
#[cfg(feature = "no")]
"no" => include_str!("stopwords/no.txt"),
#[cfg(feature = "pl")]
"pl" => include_str!("stopwords/pl.txt"),
#[cfg(feature = "pt")]
"pt" => include_str!("stopwords/pt.txt"),
#[cfg(feature = "ro")]
"ro" => include_str!("stopwords/ro.txt"),
#[cfg(feature = "ru")]
"ru" => include_str!("stopwords/ru.txt"),
#[cfg(feature = "sk")]
"sk" => include_str!("stopwords/sk.txt"),
#[cfg(feature = "sl")]
"sl" => include_str!("stopwords/sl.txt"),
#[cfg(feature = "sv")]
"sv" => include_str!("stopwords/sv.txt"),
#[cfg(feature = "tr")]
"tr" => include_str!("stopwords/tr.txt"),
#[cfg(feature = "uk")]
"uk" => include_str!("stopwords/uk.txt"),
#[cfg(feature = "zh")]
"zh" => include_str!("stopwords/zh.txt"),
_ => return None,
};
Some(Self { set: file.lines().map(ToOwned::to_owned).collect() })
}
}
impl From<hashbrown::HashSet<LTerm>> for StopWords {
fn from(lowercased: hashbrown::HashSet<LTerm>) -> Self {
Self { set: lowercased.into_iter().collect() }
}
}
impl From<std::collections::HashSet<LTerm>> for StopWords {
fn from(lowercased: std::collections::HashSet<LTerm>) -> Self {
Self { set: lowercased.into_iter().collect() }
}
}
impl Deref for StopWords {
type Target = hashbrown::HashSet<LTerm>;
fn deref(&self) -> &Self::Target {
&self.set
}
}
impl<T> AsRef<T> for StopWords
where
T: ?Sized,
<StopWords as Deref>::Target: AsRef<T>,
{
fn as_ref(&self) -> &T {
self.deref().as_ref()
}
}
impl DerefMut for StopWords {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.set
}
}