1use std::ops::{Deref, DerefMut};
2
3use crate::LTerm;
4
5#[derive(Debug, Default, Clone, Eq, PartialEq)]
12pub struct StopWords {
13 set: hashbrown::HashSet<LTerm>,
14}
15
16impl StopWords {
17 pub fn custom(lowercased: std::collections::HashSet<LTerm>) -> Self {
19 Self::from(lowercased)
20 }
21
22 pub fn predefined(lang_iso_639_2: &str) -> Option<Self> {
27 let file = match lang_iso_639_2 {
33 #[cfg(feature = "ar")]
34 "ar" => include_str!("stopwords/ar.txt"),
35 #[cfg(feature = "bg")]
36 "bg" => include_str!("stopwords/bg.txt"),
37 #[cfg(feature = "br")]
38 "br" => include_str!("stopwords/br.txt"),
39 #[cfg(feature = "cz")]
40 "cz" => include_str!("stopwords/cz.txt"),
41 #[cfg(feature = "da")]
42 "da" => include_str!("stopwords/da.txt"),
43 #[cfg(feature = "de")]
44 "de" => include_str!("stopwords/de.txt"),
45 #[cfg(feature = "el")]
46 "el" => include_str!("stopwords/el.txt"),
47 #[cfg(feature = "en")]
48 "en" => include_str!("stopwords/en.txt"),
49 #[cfg(feature = "es")]
50 "es" => include_str!("stopwords/es.txt"),
51 #[cfg(feature = "et")]
52 "et" => include_str!("stopwords/et.txt"),
53 #[cfg(feature = "fa")]
54 "fa" => include_str!("stopwords/fa.txt"),
55 #[cfg(feature = "fi")]
56 "fi" => include_str!("stopwords/fi.txt"),
57 #[cfg(feature = "fr")]
58 "fr" => include_str!("stopwords/fr.txt"),
59 #[cfg(feature = "hi")]
60 "hi" => include_str!("stopwords/hi.txt"),
61 #[cfg(feature = "hr")]
62 "hr" => include_str!("stopwords/hr.txt"),
63 #[cfg(feature = "hu")]
64 "hu" => include_str!("stopwords/hu.txt"),
65 #[cfg(feature = "hy")]
66 "hy" => include_str!("stopwords/hy.txt"),
67 #[cfg(feature = "id")]
68 "id" => include_str!("stopwords/id.txt"),
69 #[cfg(feature = "it")]
70 "it" => include_str!("stopwords/it.txt"),
71 #[cfg(feature = "ja")]
72 "ja" => include_str!("stopwords/ja.txt"),
73 #[cfg(feature = "lt")]
74 "lt" => include_str!("stopwords/lt.txt"),
75 #[cfg(feature = "lv")]
76 "lv" => include_str!("stopwords/lv.txt"),
77 #[cfg(feature = "nl")]
78 "nl" => include_str!("stopwords/nl.txt"),
79 #[cfg(feature = "no")]
80 "no" => include_str!("stopwords/no.txt"),
81 #[cfg(feature = "pl")]
82 "pl" => include_str!("stopwords/pl.txt"),
83 #[cfg(feature = "pt")]
84 "pt" => include_str!("stopwords/pt.txt"),
85 #[cfg(feature = "ro")]
86 "ro" => include_str!("stopwords/ro.txt"),
87 #[cfg(feature = "ru")]
88 "ru" => include_str!("stopwords/ru.txt"),
89 #[cfg(feature = "sk")]
90 "sk" => include_str!("stopwords/sk.txt"),
91 #[cfg(feature = "sl")]
92 "sl" => include_str!("stopwords/sl.txt"),
93 #[cfg(feature = "sv")]
94 "sv" => include_str!("stopwords/sv.txt"),
95 #[cfg(feature = "tr")]
96 "tr" => include_str!("stopwords/tr.txt"),
97 #[cfg(feature = "uk")]
98 "uk" => include_str!("stopwords/uk.txt"),
99 #[cfg(feature = "zh")]
100 "zh" => include_str!("stopwords/zh.txt"),
101 _ => return None,
102 };
103
104 Some(Self { set: file.lines().map(ToOwned::to_owned).collect() })
105 }
106}
107
108impl From<hashbrown::HashSet<LTerm>> for StopWords {
109 fn from(lowercased: hashbrown::HashSet<LTerm>) -> Self {
110 Self { set: lowercased.into_iter().collect() }
111 }
112}
113
114impl From<std::collections::HashSet<LTerm>> for StopWords {
115 fn from(lowercased: std::collections::HashSet<LTerm>) -> Self {
116 Self { set: lowercased.into_iter().collect() }
117 }
118}
119
120impl Deref for StopWords {
121 type Target = hashbrown::HashSet<LTerm>;
122
123 fn deref(&self) -> &Self::Target {
124 &self.set
125 }
126}
127
128impl<T> AsRef<T> for StopWords
129where
130 T: ?Sized,
131 <StopWords as Deref>::Target: AsRef<T>,
132{
133 fn as_ref(&self) -> &T {
134 self.deref().as_ref()
135 }
136}
137
138impl DerefMut for StopWords {
139 fn deref_mut(&mut self) -> &mut Self::Target {
140 &mut self.set
141 }
142}