text_processing_rs/taggers/
punctuation.rs1use lazy_static::lazy_static;
10
11lazy_static! {
12 static ref PUNCTUATION: Vec<(&'static str, &'static str)> = vec![
15 ("exclamation point", "!"),
17 ("exclamation mark", "!"),
18 ("question mark", "?"),
19 ("open parenthesis", "("),
20 ("close parenthesis", ")"),
21 ("left parenthesis", "("),
22 ("right parenthesis", ")"),
23 ("open bracket", "["),
24 ("close bracket", "]"),
25 ("left bracket", "["),
26 ("right bracket", "]"),
27 ("open brace", "{"),
28 ("close brace", "}"),
29 ("left brace", "{"),
30 ("right brace", "}"),
31 ("double quote", "\""),
32 ("single quote", "'"),
33 ("forward slash", "/"),
34 ("back slash", "\\"),
35
36 ("period", "."),
38 ("dot", "."),
39 ("comma", ","),
40 ("colon", ":"),
41 ("semicolon", ";"),
42 ("hyphen", "-"),
43 ("dash", "-"),
44 ("ellipsis", "..."),
45 ("ampersand", "&"),
46 ("asterisk", "*"),
47 ("at sign", "@"),
48 ("hash", "#"),
49 ("percent", "%"),
50 ("plus", "+"),
51 ("equals", "="),
52 ("tilde", "~"),
53 ("underscore", "_"),
54 ("pipe", "|"),
55 ("slash", "/"),
56 ];
57}
58
59pub fn parse(input: &str) -> Option<String> {
64 let input_lower = input.to_lowercase();
65 let input_trimmed = input_lower.trim();
66
67 for (pattern, symbol) in PUNCTUATION.iter() {
68 if input_trimmed == *pattern {
69 return Some(symbol.to_string());
70 }
71 }
72
73 None
74}
75
76#[cfg(test)]
77mod tests {
78 use super::*;
79
80 #[test]
81 fn test_basic_punctuation() {
82 assert_eq!(parse("period"), Some(".".to_string()));
83 assert_eq!(parse("comma"), Some(",".to_string()));
84 assert_eq!(parse("colon"), Some(":".to_string()));
85 assert_eq!(parse("semicolon"), Some(";".to_string()));
86 }
87
88 #[test]
89 fn test_multi_word() {
90 assert_eq!(parse("question mark"), Some("?".to_string()));
91 assert_eq!(parse("exclamation point"), Some("!".to_string()));
92 assert_eq!(parse("exclamation mark"), Some("!".to_string()));
93 assert_eq!(parse("open parenthesis"), Some("(".to_string()));
94 assert_eq!(parse("close parenthesis"), Some(")".to_string()));
95 assert_eq!(parse("double quote"), Some("\"".to_string()));
96 assert_eq!(parse("forward slash"), Some("/".to_string()));
97 }
98
99 #[test]
100 fn test_case_insensitive() {
101 assert_eq!(parse("Period"), Some(".".to_string()));
102 assert_eq!(parse("COMMA"), Some(",".to_string()));
103 assert_eq!(parse("Question Mark"), Some("?".to_string()));
104 }
105
106 #[test]
107 fn test_symbols() {
108 assert_eq!(parse("hyphen"), Some("-".to_string()));
109 assert_eq!(parse("dash"), Some("-".to_string()));
110 assert_eq!(parse("ampersand"), Some("&".to_string()));
111 assert_eq!(parse("asterisk"), Some("*".to_string()));
112 assert_eq!(parse("hash"), Some("#".to_string()));
113 assert_eq!(parse("percent"), Some("%".to_string()));
114 assert_eq!(parse("at sign"), Some("@".to_string()));
115 assert_eq!(parse("ellipsis"), Some("...".to_string()));
116 }
117
118 #[test]
119 fn test_no_match() {
120 assert_eq!(parse("hello"), None);
121 assert_eq!(parse("the period was great"), None);
122 assert_eq!(parse(""), None);
123 }
124}