1use alloc::collections::BTreeMap;
30use alloc::string::String;
31use alloc::vec::Vec;
32
33pub struct SynonymMap(BTreeMap<String, Vec<String>>);
39
40impl SynonymMap {
41 pub fn empty() -> Self {
43 SynonymMap(BTreeMap::new())
44 }
45
46 pub fn from_tsv(data: &str) -> Self {
52 let mut map: BTreeMap<String, Vec<String>> = BTreeMap::new();
53 for line in data.lines() {
54 let line = line.trim();
55 if line.is_empty() || line.starts_with('#') {
56 continue;
57 }
58 let mut parts = line.splitn(2, '\t');
59 let canonical = match parts.next() {
60 Some(c) if !c.is_empty() => String::from(c),
61 _ => continue,
62 };
63 let rest = match parts.next() {
64 Some(r) if !r.is_empty() => r,
65 _ => continue,
66 };
67 let synonyms: Vec<String> = rest
68 .split('\t')
69 .map(str::trim)
70 .filter(|s| !s.is_empty())
71 .map(String::from)
72 .collect();
73 if synonyms.is_empty() {
74 continue;
75 }
76 map.entry(canonical).or_default().extend(synonyms);
77 }
78 SynonymMap(map)
79 }
80
81 pub fn expand(&self, word: &str) -> Option<&[String]> {
87 self.0.get(word).map(Vec::as_slice)
88 }
89
90 #[inline]
92 pub fn has_synonyms(&self, word: &str) -> bool {
93 self.0.contains_key(word)
94 }
95
96 #[inline]
98 pub fn len(&self) -> usize {
99 self.0.len()
100 }
101
102 #[inline]
104 pub fn is_empty(&self) -> bool {
105 self.0.is_empty()
106 }
107}
108
109#[cfg(test)]
114mod tests {
115 use super::*;
116
117 #[test]
118 fn empty_map_returns_none() {
119 let m = SynonymMap::empty();
120 assert!(m.expand("คอม").is_none());
121 assert!(m.is_empty());
122 }
123
124 #[test]
125 fn single_synonym_parsed() {
126 let m = SynonymMap::from_tsv("คอม\tคอมพิวเตอร์\n");
127 let syns = m.expand("คอม").expect("should have synonyms");
128 assert_eq!(syns, &[String::from("คอมพิวเตอร์")]);
129 }
130
131 #[test]
132 fn multiple_synonyms_parsed() {
133 let m = SynonymMap::from_tsv("รถไฟฟ้า\tBTS\tMRT\tรถไฟใต้ดิน\n");
134 let syns = m.expand("รถไฟฟ้า").expect("should have synonyms");
135 assert_eq!(syns.len(), 3);
136 assert!(syns.contains(&String::from("BTS")));
137 assert!(syns.contains(&String::from("MRT")));
138 assert!(syns.contains(&String::from("รถไฟใต้ดิน")));
139 }
140
141 #[test]
142 fn comment_lines_skipped() {
143 let m = SynonymMap::from_tsv("# this is a comment\nคอม\tคอมพิวเตอร์\n");
144 assert_eq!(m.len(), 1);
145 }
146
147 #[test]
148 fn blank_lines_skipped() {
149 let m = SynonymMap::from_tsv("\n\nคอม\tคอมพิวเตอร์\n\n");
150 assert_eq!(m.len(), 1);
151 }
152
153 #[test]
154 fn line_without_tab_skipped() {
155 let m = SynonymMap::from_tsv("คอม\n");
156 assert!(m.expand("คอม").is_none());
157 }
158
159 #[test]
160 fn unknown_word_returns_none() {
161 let m = SynonymMap::from_tsv("คอม\tคอมพิวเตอร์\n");
162 assert!(m.expand("xyz").is_none());
163 assert!(!m.has_synonyms("xyz"));
164 }
165
166 #[test]
167 fn has_synonyms_true_for_known_word() {
168 let m = SynonymMap::from_tsv("คอม\tคอมพิวเตอร์\n");
169 assert!(m.has_synonyms("คอม"));
170 }
171
172 #[test]
173 fn duplicate_canonical_merges_synonyms() {
174 let m = SynonymMap::from_tsv("คอม\tคอมพิวเตอร์\nคอม\tcomputer\n");
175 let syns = m.expand("คอม").expect("should have synonyms");
176 assert!(syns.contains(&String::from("คอมพิวเตอร์")));
177 assert!(syns.contains(&String::from("computer")));
178 }
179
180 #[test]
181 fn empty_input_produces_empty_map() {
182 assert!(SynonymMap::from_tsv("").is_empty());
183 }
184
185 #[test]
186 fn whitespace_trimmed_from_synonyms() {
187 let m = SynonymMap::from_tsv("คอม\t คอมพิวเตอร์ \n");
188 let syns = m.expand("คอม").expect("should have synonyms");
189 assert_eq!(syns, &[String::from("คอมพิวเตอร์")]);
190 }
191}