harper_core/parsers/
collapse_identifiers.rs1use std::collections::VecDeque;
2use std::sync::Arc;
3
4use itertools::Itertools;
5
6use super::Parser;
7use crate::expr::{ExprExt, SequenceExpr};
8use crate::spell::Dictionary;
9use crate::{Lrc, Span, Token, TokenKind, VecExt};
10
11pub struct CollapseIdentifiers {
14 inner: Box<dyn Parser>,
15 dict: Arc<dyn Dictionary>,
16}
17
18impl CollapseIdentifiers {
19 pub fn new(inner: Box<dyn Parser>, dict: Box<Arc<dyn Dictionary>>) -> Self {
20 Self {
21 inner,
22 dict: *dict.clone(),
23 }
24 }
25}
26
27thread_local! {
28 static WORD_OR_NUMBER: Lrc<SequenceExpr> = Lrc::new(SequenceExpr::any_word()
29 .then_one_or_more(SequenceExpr::default()
30 .then_case_separator()
31 .then_any_word()));
32}
33
34impl Parser for CollapseIdentifiers {
35 fn parse(&self, source: &[char]) -> Vec<Token> {
36 let mut tokens = self.inner.parse(source);
37
38 let mut to_remove = VecDeque::default();
39
40 for tok_span in WORD_OR_NUMBER
41 .with(|v| v.clone())
42 .iter_matches(&tokens, source)
43 .collect::<Vec<_>>()
44 {
45 let start_tok = &tokens[tok_span.start];
46 let end_tok = &tokens[tok_span.end - 1];
47 let char_span = Span::new(start_tok.span.start, end_tok.span.end);
48
49 if self.dict.contains_word(char_span.get_content(source)) {
50 tokens[tok_span.start] = Token::new(char_span, TokenKind::blank_word());
51 to_remove.extend(tok_span.start + 1..tok_span.end);
52 }
53 }
54
55 tokens.remove_indices(to_remove.into_iter().sorted().unique().collect());
56
57 tokens
58 }
59}
60
61#[cfg(test)]
62mod tests {
63 use super::*;
64 use crate::spell::{FstDictionary, MergedDictionary, MutableDictionary};
65 use crate::{
66 DictWordMetadata,
67 parsers::{PlainEnglish, StrParser},
68 };
69
70 #[test]
71 fn matches_kebab() {
72 let source: Vec<_> = "kebab-case".chars().collect();
73
74 assert_eq!(
75 WORD_OR_NUMBER
76 .with(|v| v.clone())
77 .iter_matches(&PlainEnglish.parse(&source), &source)
78 .count(),
79 1
80 );
81 }
82
83 #[test]
84 fn no_collapse() {
85 let dict = FstDictionary::curated();
86 let source = "This is a test.";
87
88 let tokens =
89 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(dict)).parse_str(source);
90 assert_eq!(tokens.len(), 8);
91 }
92
93 #[test]
94 fn one_collapse() {
95 let source = "This is a separated_identifier, wow!";
96 let curated_dictionary = FstDictionary::curated();
97
98 let tokens =
99 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
100 .parse_str(source);
101 assert_eq!(tokens.len(), 13);
102
103 let mut dict = MutableDictionary::new();
104 dict.append_word_str("separated_identifier", DictWordMetadata::default());
105
106 let mut merged_dict = MergedDictionary::new();
107 merged_dict.add_dictionary(curated_dictionary);
108 merged_dict.add_dictionary(Arc::new(dict));
109
110 let tokens =
111 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
112 .parse_str(source);
113 assert_eq!(tokens.len(), 11);
114 }
115
116 #[test]
117 fn kebab_collapse() {
118 let source = "This is a separated-identifier, wow!";
119 let curated_dictionary = FstDictionary::curated();
120
121 let tokens =
122 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
123 .parse_str(source);
124
125 assert_eq!(tokens.len(), 13);
126
127 let mut dict = MutableDictionary::new();
128 dict.append_word_str("separated-identifier", DictWordMetadata::default());
129
130 let mut merged_dict = MergedDictionary::new();
131 merged_dict.add_dictionary(curated_dictionary);
132 merged_dict.add_dictionary(Arc::new(dict));
133
134 let tokens =
135 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
136 .parse_str(source);
137
138 assert_eq!(tokens.len(), 11);
139 }
140
141 #[test]
142 fn double_collapse() {
143 let source = "This is a separated_identifier_token, wow!";
144 let curated_dictionary = FstDictionary::curated();
145
146 let tokens =
147 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
148 .parse_str(source);
149 assert_eq!(tokens.len(), 15);
150
151 let mut dict = MutableDictionary::new();
152 dict.append_word_str("separated_identifier_token", DictWordMetadata::default());
153
154 let mut merged_dict = MergedDictionary::new();
155 merged_dict.add_dictionary(curated_dictionary);
156 merged_dict.add_dictionary(Arc::new(dict));
157
158 let tokens =
159 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
160 .parse_str(source);
161 assert_eq!(tokens.len(), 11);
162 }
163
164 #[test]
165 fn two_collapses() {
166 let source = "This is a separated_identifier, wow! separated_identifier";
167 let curated_dictionary = FstDictionary::curated();
168
169 let tokens =
170 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
171 .parse_str(source);
172 assert_eq!(tokens.len(), 17);
173
174 let mut dict = MutableDictionary::new();
175 dict.append_word_str("separated_identifier", DictWordMetadata::default());
176
177 let mut merged_dict = MergedDictionary::new();
178 merged_dict.add_dictionary(curated_dictionary);
179 merged_dict.add_dictionary(Arc::new(dict));
180
181 let tokens =
182 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
183 .parse_str(source);
184 assert_eq!(tokens.len(), 13);
185 }
186
187 #[test]
188 fn overlapping_identifiers() {
189 let source = "This is a separated_identifier_token, wow!";
190 let curated_dictionary = FstDictionary::curated();
191
192 let tokens =
193 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
194 .parse_str(source);
195 assert_eq!(tokens.len(), 15);
196
197 let mut dict = MutableDictionary::new();
198 dict.append_word_str("separated_identifier", DictWordMetadata::default());
199 dict.append_word_str("identifier_token", DictWordMetadata::default());
200
201 let mut merged_dict = MergedDictionary::new();
202 merged_dict.add_dictionary(curated_dictionary);
203 merged_dict.add_dictionary(Arc::new(dict));
204
205 let tokens =
206 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
207 .parse_str(source);
208 assert_eq!(tokens.len(), 15);
209 }
210
211 #[test]
212 fn nested_identifiers() {
213 let source = "This is a separated_identifier_token, wow!";
214 let curated_dictionary = FstDictionary::curated();
215
216 let tokens =
217 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
218 .parse_str(source);
219 assert_eq!(tokens.len(), 15);
220
221 let mut dict = MutableDictionary::new();
222 dict.append_word_str("separated_identifier_token", DictWordMetadata::default());
223 dict.append_word_str("separated_identifier", DictWordMetadata::default());
224
225 let mut merged_dict = MergedDictionary::new();
226 merged_dict.add_dictionary(curated_dictionary);
227 merged_dict.add_dictionary(Arc::new(dict));
228
229 let tokens =
230 CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
231 .parse_str(source);
232 assert_eq!(tokens.len(), 11);
233 }
234}