harper_core/parsers/
collapse_identifiers.rs

1use std::collections::VecDeque;
2use std::sync::Arc;
3
4use itertools::Itertools;
5
6use super::Parser;
7use crate::expr::{ExprExt, SequenceExpr};
8use crate::spell::Dictionary;
9use crate::{Lrc, Span, Token, TokenKind, VecExt};
10
11/// A parser that wraps any other parser to collapse token strings that match
12/// the pattern `word_word` or `word-word`.
13pub struct CollapseIdentifiers {
14    inner: Box<dyn Parser>,
15    dict: Arc<dyn Dictionary>,
16}
17
18impl CollapseIdentifiers {
19    pub fn new(inner: Box<dyn Parser>, dict: Box<Arc<dyn Dictionary>>) -> Self {
20        Self {
21            inner,
22            dict: *dict.clone(),
23        }
24    }
25}
26
27thread_local! {
28    static WORD_OR_NUMBER: Lrc<SequenceExpr> = Lrc::new(SequenceExpr::default()
29                .then_any_word()
30                .then_one_or_more(SequenceExpr::default()
31        .then_case_separator()
32        .then_any_word()));
33}
34
35impl Parser for CollapseIdentifiers {
36    fn parse(&self, source: &[char]) -> Vec<Token> {
37        let mut tokens = self.inner.parse(source);
38
39        let mut to_remove = VecDeque::default();
40
41        for tok_span in WORD_OR_NUMBER
42            .with(|v| v.clone())
43            .iter_matches(&tokens, source)
44            .collect::<Vec<_>>()
45        {
46            let start_tok = &tokens[tok_span.start];
47            let end_tok = &tokens[tok_span.end - 1];
48            let char_span = Span::new(start_tok.span.start, end_tok.span.end);
49
50            if self.dict.contains_word(char_span.get_content(source)) {
51                tokens[tok_span.start] = Token::new(char_span, TokenKind::blank_word());
52                to_remove.extend(tok_span.start + 1..tok_span.end);
53            }
54        }
55
56        tokens.remove_indices(to_remove.into_iter().sorted().unique().collect());
57
58        tokens
59    }
60}
61
62#[cfg(test)]
63mod tests {
64    use super::*;
65    use crate::spell::{FstDictionary, MergedDictionary, MutableDictionary};
66    use crate::{
67        DictWordMetadata,
68        parsers::{PlainEnglish, StrParser},
69    };
70
71    #[test]
72    fn matches_kebab() {
73        let source: Vec<_> = "kebab-case".chars().collect();
74
75        assert_eq!(
76            WORD_OR_NUMBER
77                .with(|v| v.clone())
78                .iter_matches(&PlainEnglish.parse(&source), &source)
79                .count(),
80            1
81        );
82    }
83
84    #[test]
85    fn no_collapse() {
86        let dict = FstDictionary::curated();
87        let source = "This is a test.";
88
89        let tokens =
90            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(dict)).parse_str(source);
91        assert_eq!(tokens.len(), 8);
92    }
93
94    #[test]
95    fn one_collapse() {
96        let source = "This is a separated_identifier, wow!";
97        let curated_dictionary = FstDictionary::curated();
98
99        let tokens =
100            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
101                .parse_str(source);
102        assert_eq!(tokens.len(), 13);
103
104        let mut dict = MutableDictionary::new();
105        dict.append_word_str("separated_identifier", DictWordMetadata::default());
106
107        let mut merged_dict = MergedDictionary::new();
108        merged_dict.add_dictionary(curated_dictionary);
109        merged_dict.add_dictionary(Arc::new(dict));
110
111        let tokens =
112            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
113                .parse_str(source);
114        assert_eq!(tokens.len(), 11);
115    }
116
117    #[test]
118    fn kebab_collapse() {
119        let source = "This is a separated-identifier, wow!";
120        let curated_dictionary = FstDictionary::curated();
121
122        let tokens =
123            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
124                .parse_str(source);
125
126        assert_eq!(tokens.len(), 13);
127
128        let mut dict = MutableDictionary::new();
129        dict.append_word_str("separated-identifier", DictWordMetadata::default());
130
131        let mut merged_dict = MergedDictionary::new();
132        merged_dict.add_dictionary(curated_dictionary);
133        merged_dict.add_dictionary(Arc::new(dict));
134
135        let tokens =
136            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
137                .parse_str(source);
138
139        assert_eq!(tokens.len(), 11);
140    }
141
142    #[test]
143    fn double_collapse() {
144        let source = "This is a separated_identifier_token, wow!";
145        let curated_dictionary = FstDictionary::curated();
146
147        let tokens =
148            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
149                .parse_str(source);
150        assert_eq!(tokens.len(), 15);
151
152        let mut dict = MutableDictionary::new();
153        dict.append_word_str("separated_identifier_token", DictWordMetadata::default());
154
155        let mut merged_dict = MergedDictionary::new();
156        merged_dict.add_dictionary(curated_dictionary);
157        merged_dict.add_dictionary(Arc::new(dict));
158
159        let tokens =
160            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
161                .parse_str(source);
162        assert_eq!(tokens.len(), 11);
163    }
164
165    #[test]
166    fn two_collapses() {
167        let source = "This is a separated_identifier, wow! separated_identifier";
168        let curated_dictionary = FstDictionary::curated();
169
170        let tokens =
171            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
172                .parse_str(source);
173        assert_eq!(tokens.len(), 17);
174
175        let mut dict = MutableDictionary::new();
176        dict.append_word_str("separated_identifier", DictWordMetadata::default());
177
178        let mut merged_dict = MergedDictionary::new();
179        merged_dict.add_dictionary(curated_dictionary);
180        merged_dict.add_dictionary(Arc::new(dict));
181
182        let tokens =
183            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
184                .parse_str(source);
185        assert_eq!(tokens.len(), 13);
186    }
187
188    #[test]
189    fn overlapping_identifiers() {
190        let source = "This is a separated_identifier_token, wow!";
191        let curated_dictionary = FstDictionary::curated();
192
193        let tokens =
194            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
195                .parse_str(source);
196        assert_eq!(tokens.len(), 15);
197
198        let mut dict = MutableDictionary::new();
199        dict.append_word_str("separated_identifier", DictWordMetadata::default());
200        dict.append_word_str("identifier_token", DictWordMetadata::default());
201
202        let mut merged_dict = MergedDictionary::new();
203        merged_dict.add_dictionary(curated_dictionary);
204        merged_dict.add_dictionary(Arc::new(dict));
205
206        let tokens =
207            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
208                .parse_str(source);
209        assert_eq!(tokens.len(), 15);
210    }
211
212    #[test]
213    fn nested_identifiers() {
214        let source = "This is a separated_identifier_token, wow!";
215        let curated_dictionary = FstDictionary::curated();
216
217        let tokens =
218            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
219                .parse_str(source);
220        assert_eq!(tokens.len(), 15);
221
222        let mut dict = MutableDictionary::new();
223        dict.append_word_str("separated_identifier_token", DictWordMetadata::default());
224        dict.append_word_str("separated_identifier", DictWordMetadata::default());
225
226        let mut merged_dict = MergedDictionary::new();
227        merged_dict.add_dictionary(curated_dictionary);
228        merged_dict.add_dictionary(Arc::new(dict));
229
230        let tokens =
231            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
232                .parse_str(source);
233        assert_eq!(tokens.len(), 11);
234    }
235}