Skip to main content

harper_core/parsers/
collapse_identifiers.rs

1use std::collections::VecDeque;
2use std::sync::Arc;
3
4use itertools::Itertools;
5
6use super::Parser;
7use crate::expr::{ExprExt, SequenceExpr};
8use crate::spell::Dictionary;
9use crate::{Lrc, Span, Token, TokenKind, VecExt};
10
11/// A parser that wraps any other parser to collapse token strings that match
12/// the pattern `word_word` or `word-word`.
13pub struct CollapseIdentifiers {
14    inner: Box<dyn Parser>,
15    dict: Arc<dyn Dictionary>,
16}
17
18impl CollapseIdentifiers {
19    pub fn new(inner: Box<dyn Parser>, dict: Box<Arc<dyn Dictionary>>) -> Self {
20        Self {
21            inner,
22            dict: *dict.clone(),
23        }
24    }
25}
26
27thread_local! {
28    static WORD_OR_NUMBER: Lrc<SequenceExpr> = Lrc::new(SequenceExpr::any_word()
29                .then_one_or_more(SequenceExpr::default()
30        .then_case_separator()
31        .then_any_word()));
32}
33
34impl Parser for CollapseIdentifiers {
35    fn parse(&self, source: &[char]) -> Vec<Token> {
36        let mut tokens = self.inner.parse(source);
37
38        let mut to_remove = VecDeque::default();
39
40        for tok_span in WORD_OR_NUMBER
41            .with(|v| v.clone())
42            .iter_matches(&tokens, source)
43            .collect::<Vec<_>>()
44        {
45            let start_tok = &tokens[tok_span.start];
46            let end_tok = &tokens[tok_span.end - 1];
47            let char_span = Span::new(start_tok.span.start, end_tok.span.end);
48
49            if self.dict.contains_word(char_span.get_content(source)) {
50                tokens[tok_span.start] = Token::new(char_span, TokenKind::blank_word());
51                to_remove.extend(tok_span.start + 1..tok_span.end);
52            }
53        }
54
55        tokens.remove_indices(to_remove.into_iter().sorted().unique().collect());
56
57        tokens
58    }
59}
60
61#[cfg(test)]
62mod tests {
63    use super::*;
64    use crate::spell::{FstDictionary, MergedDictionary, MutableDictionary};
65    use crate::{
66        DictWordMetadata,
67        parsers::{PlainEnglish, StrParser},
68    };
69
70    #[test]
71    fn matches_kebab() {
72        let source: Vec<_> = "kebab-case".chars().collect();
73
74        assert_eq!(
75            WORD_OR_NUMBER
76                .with(|v| v.clone())
77                .iter_matches(&PlainEnglish.parse(&source), &source)
78                .count(),
79            1
80        );
81    }
82
83    #[test]
84    fn no_collapse() {
85        let dict = FstDictionary::curated();
86        let source = "This is a test.";
87
88        let tokens =
89            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(dict)).parse_str(source);
90        assert_eq!(tokens.len(), 8);
91    }
92
93    #[test]
94    fn one_collapse() {
95        let source = "This is a separated_identifier, wow!";
96        let curated_dictionary = FstDictionary::curated();
97
98        let tokens =
99            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
100                .parse_str(source);
101        assert_eq!(tokens.len(), 13);
102
103        let mut dict = MutableDictionary::new();
104        dict.append_word_str("separated_identifier", DictWordMetadata::default());
105
106        let mut merged_dict = MergedDictionary::new();
107        merged_dict.add_dictionary(curated_dictionary);
108        merged_dict.add_dictionary(Arc::new(dict));
109
110        let tokens =
111            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
112                .parse_str(source);
113        assert_eq!(tokens.len(), 11);
114    }
115
116    #[test]
117    fn kebab_collapse() {
118        let source = "This is a separated-identifier, wow!";
119        let curated_dictionary = FstDictionary::curated();
120
121        let tokens =
122            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
123                .parse_str(source);
124
125        assert_eq!(tokens.len(), 13);
126
127        let mut dict = MutableDictionary::new();
128        dict.append_word_str("separated-identifier", DictWordMetadata::default());
129
130        let mut merged_dict = MergedDictionary::new();
131        merged_dict.add_dictionary(curated_dictionary);
132        merged_dict.add_dictionary(Arc::new(dict));
133
134        let tokens =
135            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
136                .parse_str(source);
137
138        assert_eq!(tokens.len(), 11);
139    }
140
141    #[test]
142    fn double_collapse() {
143        let source = "This is a separated_identifier_token, wow!";
144        let curated_dictionary = FstDictionary::curated();
145
146        let tokens =
147            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
148                .parse_str(source);
149        assert_eq!(tokens.len(), 15);
150
151        let mut dict = MutableDictionary::new();
152        dict.append_word_str("separated_identifier_token", DictWordMetadata::default());
153
154        let mut merged_dict = MergedDictionary::new();
155        merged_dict.add_dictionary(curated_dictionary);
156        merged_dict.add_dictionary(Arc::new(dict));
157
158        let tokens =
159            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
160                .parse_str(source);
161        assert_eq!(tokens.len(), 11);
162    }
163
164    #[test]
165    fn two_collapses() {
166        let source = "This is a separated_identifier, wow! separated_identifier";
167        let curated_dictionary = FstDictionary::curated();
168
169        let tokens =
170            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
171                .parse_str(source);
172        assert_eq!(tokens.len(), 17);
173
174        let mut dict = MutableDictionary::new();
175        dict.append_word_str("separated_identifier", DictWordMetadata::default());
176
177        let mut merged_dict = MergedDictionary::new();
178        merged_dict.add_dictionary(curated_dictionary);
179        merged_dict.add_dictionary(Arc::new(dict));
180
181        let tokens =
182            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
183                .parse_str(source);
184        assert_eq!(tokens.len(), 13);
185    }
186
187    #[test]
188    fn overlapping_identifiers() {
189        let source = "This is a separated_identifier_token, wow!";
190        let curated_dictionary = FstDictionary::curated();
191
192        let tokens =
193            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
194                .parse_str(source);
195        assert_eq!(tokens.len(), 15);
196
197        let mut dict = MutableDictionary::new();
198        dict.append_word_str("separated_identifier", DictWordMetadata::default());
199        dict.append_word_str("identifier_token", DictWordMetadata::default());
200
201        let mut merged_dict = MergedDictionary::new();
202        merged_dict.add_dictionary(curated_dictionary);
203        merged_dict.add_dictionary(Arc::new(dict));
204
205        let tokens =
206            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
207                .parse_str(source);
208        assert_eq!(tokens.len(), 15);
209    }
210
211    #[test]
212    fn nested_identifiers() {
213        let source = "This is a separated_identifier_token, wow!";
214        let curated_dictionary = FstDictionary::curated();
215
216        let tokens =
217            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(curated_dictionary.clone()))
218                .parse_str(source);
219        assert_eq!(tokens.len(), 15);
220
221        let mut dict = MutableDictionary::new();
222        dict.append_word_str("separated_identifier_token", DictWordMetadata::default());
223        dict.append_word_str("separated_identifier", DictWordMetadata::default());
224
225        let mut merged_dict = MergedDictionary::new();
226        merged_dict.add_dictionary(curated_dictionary);
227        merged_dict.add_dictionary(Arc::new(dict));
228
229        let tokens =
230            CollapseIdentifiers::new(Box::new(PlainEnglish), Box::new(Arc::new(merged_dict)))
231                .parse_str(source);
232        assert_eq!(tokens.len(), 11);
233    }
234}