harper_core/linting/
comma_fixes.rs

1use super::{Lint, LintKind, Linter, Suggestion};
2use crate::{
3    Span,
4    TokenKind::{Space, Unlintable, Word},
5    TokenStringExt,
6};
7
8const MSG_SPACE_BEFORE: &str = "Don't use a space before a comma.";
9const MSG_AVOID_ASIAN: &str = "Avoid East Asian commas in English contexts.";
10const MSG_SPACE_AFTER: &str = "Use a space after a comma.";
11
12/// A linter that fixes common comma errors:
13/// No space after.
14/// Inappropriate space before.
15/// Asian commas instead of English commas.
16/// This linter only Asian commas anywhere, and wrong spacing of commas between words.
17/// Commas between numbers are used differently in different contexts and these are not checked:
18/// Lists of numbers: 1, 2, 3
19/// Thousands separators: 1,000,000
20/// Decimal points used mistakenly by Europeans: 3,14159
21#[derive(Debug, Default)]
22pub struct CommaFixes;
23
24impl Linter for CommaFixes {
25    fn lint(&mut self, document: &crate::Document) -> Vec<Lint> {
26        let mut lints = Vec::new();
27        let source = document.get_source();
28
29        for ci in document.iter_comma_indices() {
30            let mut toks = (None, None, document.get_token(ci).unwrap(), None, None);
31            toks.0 = (ci >= 2).then(|| document.get_token(ci - 2).unwrap());
32            toks.1 = (ci >= 1).then(|| document.get_token(ci - 1).unwrap());
33            toks.3 = document.get_token(ci + 1);
34            toks.4 = document.get_token(ci + 2);
35
36            let kinds = (
37                toks.0.map(|t| &t.kind),
38                toks.1.map(|t| &t.kind),
39                *toks.2.span.get_content(source).first().unwrap(),
40                toks.3.map(|t| &t.kind),
41                toks.4.map(|t| &t.kind),
42            );
43
44            let (span, suggestion, message) = match kinds {
45                (_, Some(Word(_)), '、' | ',', Some(Space(_)), Some(Word(_))) => (
46                    toks.2.span,
47                    Suggestion::ReplaceWith(vec![',']),
48                    vec![MSG_AVOID_ASIAN],
49                ),
50
51                (Some(Word(_)), Some(Space(_)), ',', Some(Space(_)), Some(Word(_))) => (
52                    toks.1.unwrap().span,
53                    Suggestion::Remove,
54                    vec![MSG_SPACE_BEFORE],
55                ),
56
57                (Some(Word(_)), Some(Space(_)), '、' | ',', Some(Space(_)), Some(Word(_))) => (
58                    Span::new(toks.1.unwrap().span.start, toks.2.span.end),
59                    Suggestion::ReplaceWith(vec![',']),
60                    vec![MSG_SPACE_BEFORE, MSG_AVOID_ASIAN],
61                ),
62
63                (_, Some(Word(_)), ',', Some(Word(_)), _) => (
64                    toks.2.span,
65                    Suggestion::InsertAfter(vec![' ']),
66                    vec![MSG_SPACE_AFTER],
67                ),
68
69                (_, Some(Word(_)), '、' | ',', Some(Word(_)), _) => (
70                    toks.2.span,
71                    Suggestion::ReplaceWith(vec![',', ' ']),
72                    vec![MSG_AVOID_ASIAN, MSG_SPACE_AFTER],
73                ),
74
75                (Some(Word(_)), Some(Space(_)), ',', Some(Word(_)), _) => (
76                    Span::new(toks.1.unwrap().span.start, toks.2.span.end),
77                    Suggestion::ReplaceWith(vec![',', ' ']),
78                    vec![MSG_SPACE_BEFORE, MSG_SPACE_AFTER],
79                ),
80
81                (Some(Word(_)), Some(Space(_)), '、' | ',', Some(Word(_)), _) => (
82                    Span::new(toks.1.unwrap().span.start, toks.2.span.end),
83                    Suggestion::ReplaceWith(vec![',', ' ']),
84                    vec![MSG_SPACE_BEFORE, MSG_AVOID_ASIAN, MSG_SPACE_AFTER],
85                ),
86
87                // Handles Asian commas in all other contexts
88                // Unlintable is used for non-English tokens to prevent changing commas in CJK text
89                (_, Some(Unlintable), '、' | ',', _, _) => continue,
90                (_, _, '、' | ',', Some(Unlintable), _) => continue,
91
92                (_, _, '、' | ',', _, _) => (
93                    toks.2.span,
94                    Suggestion::ReplaceWith(vec![',']),
95                    vec![MSG_AVOID_ASIAN],
96                ),
97
98                _ => continue,
99            };
100
101            lints.push(Lint {
102                span,
103                lint_kind: LintKind::Punctuation,
104                suggestions: vec![suggestion],
105                message: message.join(" "),
106                priority: 32,
107            });
108        }
109
110        lints
111    }
112
113    fn description(&self) -> &'static str {
114        "Fix common comma errors such as no space after, erroneous space before, etc., Asian commas instead of English commas, etc."
115    }
116}
117
118#[cfg(test)]
119mod tests {
120    use super::CommaFixes;
121    use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
122
123    #[test]
124    fn allows_english_comma_atomic() {
125        assert_lint_count(",", CommaFixes, 0);
126    }
127
128    #[test]
129    fn flags_fullwidth_comma_atomic() {
130        assert_lint_count(",", CommaFixes, 1);
131    }
132
133    #[test]
134    fn flags_ideographic_comma_atomic() {
135        assert_lint_count("、", CommaFixes, 1);
136    }
137
138    #[test]
139    fn corrects_fullwidth_comma_real_world() {
140        assert_suggestion_result(
141            "higher 2 bits of the number of nodes, whether abandoned or not decided by .index section",
142            CommaFixes,
143            "higher 2 bits of the number of nodes, whether abandoned or not decided by .index section",
144        );
145    }
146
147    #[test]
148    fn corrects_ideographic_comma_real_world() {
149        assert_suggestion_result("cout、endl、string", CommaFixes, "cout, endl, string")
150    }
151
152    #[test]
153    fn doesnt_flag_comma_space_between_words() {
154        assert_lint_count("foo, bar", CommaFixes, 0);
155    }
156
157    #[test]
158    fn flags_fullwidth_comma_space_between_words() {
159        assert_lint_count("foo, bar", CommaFixes, 1);
160    }
161
162    #[test]
163    fn flags_ideographic_comma_space_between_words() {
164        assert_lint_count("foo、 bar", CommaFixes, 1);
165    }
166
167    #[test]
168    fn doesnt_flag_semicolon_space_between_words() {
169        assert_lint_count("foo; bar", CommaFixes, 0);
170    }
171
172    #[test]
173    fn corrects_comma_between_words_with_no_space() {
174        assert_suggestion_result("foo,bar", CommaFixes, "foo, bar")
175    }
176
177    #[test]
178    fn corrects_asian_comma_between_words_with_no_space() {
179        assert_suggestion_result("foo,bar", CommaFixes, "foo, bar")
180    }
181
182    #[test]
183    fn corrects_space_on_wrong_side_of_comma_between_words() {
184        assert_suggestion_result("foo ,bar", CommaFixes, "foo, bar")
185    }
186
187    #[test]
188    fn corrects_comma_on_wrong_side_of_asian_comma_between_words() {
189        assert_suggestion_result("foo ,bar", CommaFixes, "foo, bar")
190    }
191
192    #[test]
193    fn corrects_comma_between_words_with_space_on_both_sides() {
194        assert_suggestion_result("foo , bar", CommaFixes, "foo, bar")
195    }
196
197    #[test]
198    fn corrects_asian_comma_between_words_with_space_on_both_sides() {
199        assert_suggestion_result("foo 、 bar", CommaFixes, "foo, bar")
200    }
201
202    #[test]
203    fn doesnt_correct_comma_between_non_english_tokens() {
204        assert_lint_count("严禁采摘花、 果、叶,挖掘树根、草药!", CommaFixes, 0);
205    }
206}