harper_core/parsers/
collapse_identifiers.rs1use std::collections::VecDeque;
2use std::sync::Arc;
3
4use itertools::Itertools;
5
6use super::Parser;
7use crate::expr::{ExprExt, SequenceExpr};
8use crate::spell::Dictionary;
9use crate::{Lrc, Span, Token, TokenKind, VecExt};
10
11pub struct CollapseIdentifiers {
14 inner: Box<dyn Parser>,
15 dict: Arc<dyn Dictionary>,
16}
17
18impl CollapseIdentifiers {
19 pub fn new(inner: Box<dyn Parser>, dict: Box<Arc<dyn Dictionary>>) -> Self {
20 Self {
21 inner,
22 dict: *dict.clone(),
23 }
24 }
25}
26
27thread_local! {
28 static WORD_OR_NUMBER: Lrc<SequenceExpr> = Lrc::new(SequenceExpr::default()
29 .then_any_word()
30 .then_one_or_more(SequenceExpr::default()
31 .then_case_separator()
32 .then_any_word()));
33}
34
35impl Parser for CollapseIdentifiers {
36 fn parse(&self, source: &[char]) -> Vec<Token> {
37 let mut tokens = self.inner.parse(source);
38
39 let mut to_remove = VecDeque::default();
40
41 for tok_span in WORD_OR_NUMBER
42 .with(|v| v.clone())
43 .iter_matches(&tokens, source)
44 .collect::<Vec<_>>()
45 {
46 let start_tok = &tokens[tok_span.start];
47 let end_tok = &tokens[tok_span.end - 1];
48 let char_span = Span::new(start_tok.span.start, end_tok.span.end);
49
50 if self.dict.contains_word(char_span.get_content(source)) {
51 tokens[tok_span.start] = Token::new(char_span, TokenKind::blank_word());
52 to_remove.extend(tok_span.start + 1..tok_span.end);
53 }
54 }
55
56 tokens.remove_indices(to_remove.into_iter().sorted().unique().collect());
57
58 tokens
59 }
60}
61
62#[cfg(test)]
63mod tests {
64 use super::*;
65 use crate::spell::{FstDictionary, MergedDictionary, MutableDictionary};
66 use crate::{
67 DictWordMetadata,
68 parsers::{PlainEnglish, StrParser},
69 };
70
71 #[test]
72 fn matches_kebab() {
73 let source: Vec<_> = "kebab-case".chars().collect();
74
75 assert_eq!(
76 WORD_OR_NUMBER
77 .with(|v| v.clone())
78 .iter_matches(&PlainEnglish.parse(&source), &source)
79 .count(),
80 1
81 );
82 }
83
84 #[test]
85 fn no_collapse() {
86 let dict = FstDictionary::curated();
87 let source = "This is a test.";
88
89 let tokens =
90 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(dict)).parse_str(source);
91 assert_eq!(tokens.len(), 8);
92 }
93
94 #[test]
95 fn one_collapse() {
96 let source = "This is a separated_identifier, wow!";
97 let curated_dictionary = FstDictionary::curated();
98
99 let tokens =
100 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
101 .parse_str(source);
102 assert_eq!(tokens.len(), 13);
103
104 let mut dict = MutableDictionary::new();
105 dict.append_word_str("separated_identifier", DictWordMetadata::default());
106
107 let mut merged_dict = MergedDictionary::new();
108 merged_dict.add_dictionary(curated_dictionary);
109 merged_dict.add_dictionary(Arc::new(dict));
110
111 let tokens =
112 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
113 .parse_str(source);
114 assert_eq!(tokens.len(), 11);
115 }
116
117 #[test]
118 fn kebab_collapse() {
119 let source = "This is a separated-identifier, wow!";
120 let curated_dictionary = FstDictionary::curated();
121
122 let tokens =
123 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
124 .parse_str(source);
125
126 assert_eq!(tokens.len(), 13);
127
128 let mut dict = MutableDictionary::new();
129 dict.append_word_str("separated-identifier", DictWordMetadata::default());
130
131 let mut merged_dict = MergedDictionary::new();
132 merged_dict.add_dictionary(curated_dictionary);
133 merged_dict.add_dictionary(Arc::new(dict));
134
135 let tokens =
136 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
137 .parse_str(source);
138
139 assert_eq!(tokens.len(), 11);
140 }
141
142 #[test]
143 fn double_collapse() {
144 let source = "This is a separated_identifier_token, wow!";
145 let curated_dictionary = FstDictionary::curated();
146
147 let tokens =
148 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
149 .parse_str(source);
150 assert_eq!(tokens.len(), 15);
151
152 let mut dict = MutableDictionary::new();
153 dict.append_word_str("separated_identifier_token", DictWordMetadata::default());
154
155 let mut merged_dict = MergedDictionary::new();
156 merged_dict.add_dictionary(curated_dictionary);
157 merged_dict.add_dictionary(Arc::new(dict));
158
159 let tokens =
160 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
161 .parse_str(source);
162 assert_eq!(tokens.len(), 11);
163 }
164
165 #[test]
166 fn two_collapses() {
167 let source = "This is a separated_identifier, wow! separated_identifier";
168 let curated_dictionary = FstDictionary::curated();
169
170 let tokens =
171 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
172 .parse_str(source);
173 assert_eq!(tokens.len(), 17);
174
175 let mut dict = MutableDictionary::new();
176 dict.append_word_str("separated_identifier", DictWordMetadata::default());
177
178 let mut merged_dict = MergedDictionary::new();
179 merged_dict.add_dictionary(curated_dictionary);
180 merged_dict.add_dictionary(Arc::new(dict));
181
182 let tokens =
183 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
184 .parse_str(source);
185 assert_eq!(tokens.len(), 13);
186 }
187
188 #[test]
189 fn overlapping_identifiers() {
190 let source = "This is a separated_identifier_token, wow!";
191 let curated_dictionary = FstDictionary::curated();
192
193 let tokens =
194 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
195 .parse_str(source);
196 assert_eq!(tokens.len(), 15);
197
198 let mut dict = MutableDictionary::new();
199 dict.append_word_str("separated_identifier", DictWordMetadata::default());
200 dict.append_word_str("identifier_token", DictWordMetadata::default());
201
202 let mut merged_dict = MergedDictionary::new();
203 merged_dict.add_dictionary(curated_dictionary);
204 merged_dict.add_dictionary(Arc::new(dict));
205
206 let tokens =
207 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
208 .parse_str(source);
209 assert_eq!(tokens.len(), 15);
210 }
211
212 #[test]
213 fn nested_identifiers() {
214 let source = "This is a separated_identifier_token, wow!";
215 let curated_dictionary = FstDictionary::curated();
216
217 let tokens =
218 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
219 .parse_str(source);
220 assert_eq!(tokens.len(), 15);
221
222 let mut dict = MutableDictionary::new();
223 dict.append_word_str("separated_identifier_token", DictWordMetadata::default());
224 dict.append_word_str("separated_identifier", DictWordMetadata::default());
225
226 let mut merged_dict = MergedDictionary::new();
227 merged_dict.add_dictionary(curated_dictionary);
228 merged_dict.add_dictionary(Arc::new(dict));
229
230 let tokens =
231 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
232 .parse_str(source);
233 assert_eq!(tokens.len(), 11);
234 }
235}