harper_core/linting/
dashes.rs

1use crate::{
2    Token, TokenStringExt,
3    patterns::{EitherPattern, Pattern, SequencePattern},
4};
5
6use super::{Lint, LintKind, PatternLinter, Suggestion};
7
8const EN_DASH: char = '–';
9const EM_DASH: char = '—';
10
11pub struct Dashes {
12    pattern: Box<dyn Pattern>,
13}
14
15impl Default for Dashes {
16    fn default() -> Self {
17        let en_dash = SequencePattern::default().then_hyphen().then_hyphen();
18        let em_dash_or_longer = SequencePattern::default()
19            .then_hyphen()
20            .then_hyphen()
21            .then_one_or_more_hyphens();
22
23        let pattern = EitherPattern::new(vec![Box::new(em_dash_or_longer), Box::new(en_dash)]);
24
25        Self {
26            pattern: Box::new(pattern),
27        }
28    }
29}
30
31impl PatternLinter for Dashes {
32    fn pattern(&self) -> &dyn Pattern {
33        self.pattern.as_ref()
34    }
35
36    fn match_to_lint(&self, matched_tokens: &[Token], _source: &[char]) -> Option<Lint> {
37        let span = matched_tokens.span()?;
38        let lint_kind = LintKind::Formatting;
39
40        match matched_tokens.len() {
41            2 => Some(Lint {
42                span,
43                lint_kind,
44                suggestions: vec![Suggestion::ReplaceWith(vec![EN_DASH])],
45                message: "A sequence of hyphens is not an en dash.".to_owned(),
46                priority: 63,
47            }),
48            3 => Some(Lint {
49                span,
50                lint_kind,
51                suggestions: vec![Suggestion::ReplaceWith(vec![EM_DASH])],
52                message: "A sequence of hyphens is not an em dash.".to_owned(),
53                priority: 63,
54            }),
55            4.. => None, // Ignore longer hyphen sequences.
56            _ => panic!("Received unexpected number of tokens."),
57        }
58    }
59
60    fn description(&self) -> &'static str {
61        "Rather than outright using an em dash or en dash, authors often use a sequence of hyphens, expecting them to be condensed. Use two hyphens to denote an en dash and three to denote an em dash."
62    }
63}
64
65#[cfg(test)]
66mod tests {
67    use crate::linting::tests::{assert_suggestion_count, assert_suggestion_result};
68
69    use super::Dashes;
70    use super::{EM_DASH, EN_DASH};
71
72    #[test]
73    fn catches_en_dash() {
74        assert_suggestion_result(
75            "pre--Industrial Revolution",
76            Dashes::default(),
77            &format!("pre{EN_DASH}Industrial Revolution"),
78        );
79    }
80
81    #[test]
82    fn catches_em_dash() {
83        assert_suggestion_result(
84            "'There is no box' --- Scott",
85            Dashes::default(),
86            &format!("'There is no box' {EM_DASH} Scott"),
87        );
88    }
89
90    #[test]
91    fn no_overlaps() {
92        assert_suggestion_count("'There is no box' --- Scott", Dashes::default(), 1);
93    }
94
95    #[test]
96    fn no_lint_for_long_hyphen_sequences() {
97        assert_suggestion_count("'There is no box' ------ Scott", Dashes::default(), 0);
98    }
99}