1use std::collections::HashMap;
34use std::fs;
35use std::path::Path;
36
37pub fn load_emoji_sequences<P: AsRef<Path>>(
66 filepath: P,
67) -> Result<HashMap<String, String>, std::io::Error> {
68 let contents = fs::read_to_string(filepath)?;
69
70 let mut map = HashMap::new();
71
72 for raw_line in contents.lines() {
73 let line = raw_line.trim();
74
75 if line.is_empty() || line.starts_with('#') {
77 continue;
78 }
79
80 let (data_part, comment_part) = match line.split_once('#') {
82 Some((before, after)) => (before.trim(), after.trim()),
83 None => (line, ""),
84 };
85
86 let raw_label_after_paren =
88 if let Some(close_paren_idx) = comment_part.find(')') {
89 &comment_part[close_paren_idx + 1..]
90 } else {
91 comment_part
92 };
93
94 let short_label = raw_label_after_paren
96 .trim()
97 .to_lowercase()
98 .split_whitespace()
99 .collect::<Vec<_>>()
100 .join("-");
101
102 let data_fields: Vec<&str> =
104 data_part.split(';').map(|s| s.trim()).collect();
105 if data_fields.is_empty() {
106 continue;
107 }
108
109 let hex_seq = data_fields[0];
111
112 let emoji_string: String = hex_seq
114 .split_whitespace()
115 .filter_map(|hex| u32::from_str_radix(hex, 16).ok())
116 .flat_map(char::from_u32)
117 .collect();
118
119 if emoji_string.is_empty() {
120 continue; }
122
123 let _ = map.insert(emoji_string, short_label);
125 }
126
127 Ok(map)
128}
129
130#[cfg(test)]
131mod tests {
132 use super::*;
133 use std::io::Write;
134 use tempfile::NamedTempFile;
135
136 fn create_temp_file(content: &str) -> NamedTempFile {
138 let mut file = NamedTempFile::new()
139 .expect("Failed to create temporary file");
140 file.write_all(content.as_bytes())
141 .expect("Failed to write to temporary file");
142 file
143 }
144
145 #[test]
146 fn test_load_emoji_sequences_basic() {
147 let test_data = r#"
148 26A1 ; emoji ; L1 ; none ; a j # V4.0 (⚡) HIGH VOLTAGE SIGN
149 1F600 ; emoji ; L1 ; none ; j # V6.0 (😀) GRINNING FACE
150 "#;
151
152 let file = create_temp_file(test_data);
153
154 let result = load_emoji_sequences(file.path()).unwrap();
155
156 let mut expected = HashMap::new();
157 let _ = expected
158 .insert("⚡".to_string(), "high-voltage-sign".to_string());
159 let _ = expected
160 .insert("😀".to_string(), "grinning-face".to_string());
161
162 assert_eq!(result, expected);
163 }
164
165 #[test]
166 fn test_load_emoji_sequences_empty_file() {
167 let test_data = "";
168
169 let file = create_temp_file(test_data);
170
171 let result = load_emoji_sequences(file.path());
172
173 assert!(result.unwrap().is_empty());
174 }
175
176 #[test]
177 fn test_load_emoji_sequences_with_comments_and_blanks() {
178 let test_data = r#"
179 # This is a comment
180
181 1F44D ; emoji ; L1 ; none ; j # V6.0 (👍) THUMBS UP SIGN
182
183 # Another comment here
184
185"#;
186
187 let file = create_temp_file(test_data);
188
189 let result = load_emoji_sequences(file.path());
190
191 let mut expected = HashMap::new();
192 let _ = expected
193 .insert("👍".to_string(), "thumbs-up-sign".to_string());
194
195 assert_eq!(result.unwrap(), expected);
196 }
197
198 #[test]
199 fn test_load_emoji_sequences_no_comment_label() {
200 let test_data = r#"
201 1F4AF ; emoji ; L1 ; none ; j # V6.0 (💯) HUNDRED POINTS SYMBOL
202 1F602 ; emoji ; L1 ; none ; j
203"#;
204
205 let file = create_temp_file(test_data);
206
207 let result = load_emoji_sequences(file.path());
208
209 let mut expected = HashMap::new();
210 let _ = expected.insert(
211 "💯".to_string(),
212 "hundred-points-symbol".to_string(),
213 );
214 let _ = expected.insert("😂".to_string(), "".to_string()); assert_eq!(result.unwrap(), expected);
217 }
218
219 #[test]
220 fn test_load_emoji_sequences_invalid_hex_code() {
221 let test_data = r#"
222 26A1 ; emoji ; L1 ; none ; a j # V4.0 (⚡) HIGH VOLTAGE SIGN
223 INVALID_HEX ; emoji ; L1 ; none ; j # Invalid hex code
224"#;
225
226 let file = create_temp_file(test_data);
227
228 let result = load_emoji_sequences(file.path());
229
230 let mut expected = HashMap::new();
231 let _ = expected
232 .insert("⚡".to_string(), "high-voltage-sign".to_string());
233
234 assert_eq!(result.unwrap(), expected);
235 }
236
237 #[test]
238 fn test_load_emoji_sequences_multi_codepoint() {
239 let test_data = r#"
240 1F1E6 1F1FA ; emoji ; L1 ; none ; j # V6.0 (🇦🇺) FLAG FOR AUSTRALIA
241"#;
242
243 let file = create_temp_file(test_data);
244
245 let result = load_emoji_sequences(file.path());
246
247 let mut expected = HashMap::new();
248 let _ = expected
249 .insert("🇦🇺".to_string(), "flag-for-australia".to_string());
250
251 assert_eq!(result.unwrap(), expected);
252 }
253
254 #[test]
255 fn test_load_emoji_sequences_missing_label() {
256 let test_data = r#"
257 1F44D ; emoji ; L1 ; none ; j # V6.0 (👍) THUMBS UP SIGN
258 1F602 ; emoji ; L1 ; none ; j
259 1F600 ; emoji ; L1 ; none ; j #
260"#;
261
262 let file = create_temp_file(test_data);
263
264 let result = load_emoji_sequences(file.path());
265
266 let mut expected = HashMap::new();
267 let _ = expected
268 .insert("👍".to_string(), "thumbs-up-sign".to_string());
269 let _ = expected.insert("😂".to_string(), "".to_string()); let _ = expected.insert("😀".to_string(), "".to_string()); assert_eq!(result.unwrap(), expected);
273 }
274
275 #[test]
276 fn test_load_emoji_sequences_handles_empty_and_whitespace() {
277 let test_data = r#"
278
279 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
280
281 "#;
282
283 let file = create_temp_file(test_data);
284
285 let result = load_emoji_sequences(file.path());
286
287 let mut expected = HashMap::new();
288 let _ = expected.insert(
289 "😂".to_string(),
290 "face-with-tears-of-joy".to_string(),
291 );
292
293 assert_eq!(result.unwrap(), expected);
294 }
295
296 #[test]
297 fn test_load_emoji_sequences_handles_trailing_whitespace() {
298 let test_data = r#"
299 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
300 "#;
301
302 let file = create_temp_file(test_data);
303
304 let result = load_emoji_sequences(file.path());
305
306 let mut expected = HashMap::new();
307 let _ = expected.insert(
308 "😂".to_string(),
309 "face-with-tears-of-joy".to_string(),
310 );
311
312 assert_eq!(result.unwrap(), expected);
313 }
314
315 #[test]
316 fn test_load_emoji_sequences_skip_invalid_lines() {
317 let test_data = r#"
318 # Comment line
319 ; invalid line ; no hex code ; # Just semicolons
320 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
321 "#;
322
323 let file = create_temp_file(test_data);
324 let result = load_emoji_sequences(file.path()).unwrap();
325
326 let mut expected = HashMap::new();
328 let _ = expected.insert(
329 "😂".to_string(),
330 "face-with-tears-of-joy".to_string(),
331 );
332 assert_eq!(result, expected);
333 }
334
335 #[test]
336 fn test_load_emoji_sequences_split_behavior() {
337 let test_data = r#"
338 26A1;emoji;L1;none;a j# V4.0 (⚡) HIGH VOLTAGE SIGN
339 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
340 26A1 ; emoji ; L1 ; none ; a j # V4.0 (⚡) HIGH VOLTAGE SIGN
341 "#;
342
343 let file = create_temp_file(test_data);
344 let result = load_emoji_sequences(file.path()).unwrap();
345
346 let mut expected = HashMap::new();
347 let _ = expected
348 .insert("⚡".to_string(), "high-voltage-sign".to_string());
349 let _ = expected.insert(
350 "😂".to_string(),
351 "face-with-tears-of-joy".to_string(),
352 );
353 assert_eq!(result, expected);
354 }
355
356 #[test]
357 fn test_load_emoji_sequences_parenthesis_variations() {
358 let test_data = r#"
359 26A1 ; emoji ; L1 ; none ; a j # (⚡) HIGH VOLTAGE
360 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS
361 1F603 ; emoji ; L1 ; none ; j # V6.0 (😃) SMILEY FACE
362 1F604 ; emoji ; L1 ; none ; j # V6.0 (😄) GRINNING FACE
363 "#;
364
365 let file = create_temp_file(test_data);
366 let result = load_emoji_sequences(file.path()).unwrap();
367
368 let mut expected = HashMap::new();
369 let _ = expected
370 .insert("⚡".to_string(), "high-voltage".to_string());
371 let _ = expected
372 .insert("😂".to_string(), "face-with-tears".to_string());
373 let _ = expected
374 .insert("😃".to_string(), "smiley-face".to_string());
375 let _ = expected
376 .insert("😄".to_string(), "grinning-face".to_string());
377 assert_eq!(result, expected);
378 }
379
380 #[test]
381 fn test_load_emoji_sequences_unparseable_sequences() {
382 let test_data = r#"
383 110000 ; emoji ; L1 ; none ; j # Above Unicode range INVALID
384 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
385 D800 ; emoji ; L1 ; none ; j # Surrogate code point
386 "#;
387
388 let file = create_temp_file(test_data);
389 let result = load_emoji_sequences(file.path()).unwrap();
390
391 let mut expected = HashMap::new();
393 let _ = expected.insert(
394 "😂".to_string(),
395 "face-with-tears-of-joy".to_string(),
396 );
397 assert_eq!(result, expected);
398 }
399
400 #[test]
401 fn test_load_emoji_sequences_empty_fields() {
402 let test_data = r#"
403 ; ; ; ; ; # Empty fields should be skipped
404 1F602 ; emoji ; L1 ; none ; j # V6.0 (😂) FACE WITH TEARS OF JOY
405 #
406 "#;
407
408 let file = create_temp_file(test_data);
409 let result = load_emoji_sequences(file.path()).unwrap();
410
411 let mut expected = HashMap::new();
412 let _ = expected.insert(
413 "😂".to_string(),
414 "face-with-tears-of-joy".to_string(),
415 );
416 assert_eq!(result, expected);
417 }
418
419 #[test]
420 fn test_load_emoji_sequences_whitespace_variations() {
421 let test_data = r#"
422 1F602;emoji;L1;none;j# V6.0 (😂) FACE WITH TEARS OF JOY
423 1F603 ; emoji ; L1 ; none ; j # V6.0 (😃) SMILEY FACE
424 "#;
425
426 let file = create_temp_file(test_data);
427 let result = load_emoji_sequences(file.path()).unwrap();
428
429 let mut expected = HashMap::new();
430 let _ = expected.insert(
431 "😂".to_string(),
432 "face-with-tears-of-joy".to_string(),
433 );
434 let _ = expected
435 .insert("😃".to_string(), "smiley-face".to_string());
436 assert_eq!(result, expected);
437 }
438}