harper_core/linting/
merge_words.rs

1use std::sync::Arc;
2
3use itertools::Itertools;
4
5use super::{Lint, LintKind, Linter, Suggestion};
6use crate::spell::{Dictionary, FstDictionary};
7use crate::{CharString, Document, Span};
8
9pub struct MergeWords {
10    dict: Arc<FstDictionary>,
11}
12
13impl MergeWords {
14    pub fn new() -> Self {
15        Self {
16            dict: FstDictionary::curated(),
17        }
18    }
19}
20
21impl Default for MergeWords {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl Linter for MergeWords {
28    fn lint(&mut self, document: &Document) -> Vec<Lint> {
29        let mut lints = Vec::new();
30
31        let mut merged_word = CharString::new();
32
33        for (a, w, b) in document.tokens().tuple_windows() {
34            if !a.kind.is_word() || !w.kind.is_whitespace() || !b.kind.is_word() {
35                continue;
36            }
37
38            let a_chars = document.get_span_content(&a.span);
39            let b_chars = document.get_span_content(&b.span);
40
41            if (a_chars.len() == 1 && a_chars[0].is_uppercase())
42                || (b_chars.len() == 1 && b_chars[0].is_uppercase())
43            {
44                continue;
45            }
46
47            // Not super helpful in this case, so we skip it
48            if matches!(a_chars, ['a']) || matches!(b_chars, ['a']) {
49                continue;
50            }
51
52            merged_word.clear();
53            merged_word.extend_from_slice(a_chars);
54            merged_word.extend_from_slice(b_chars);
55
56            if self.dict.contains_word(&merged_word)
57                && (!self.dict.contains_word(a_chars) || !self.dict.contains_word(b_chars))
58            {
59                lints.push(Lint {
60                    span: Span::new(a.span.start, b.span.end),
61                    lint_kind: LintKind::WordChoice,
62                    suggestions: vec![Suggestion::ReplaceWith(merged_word.to_vec())],
63                    message: "It seems these words would go better together.".to_owned(),
64                    priority: 63,
65                });
66            }
67
68            merged_word.clear();
69            merged_word.extend_from_slice(a_chars);
70            merged_word.push('\'');
71            merged_word.extend_from_slice(b_chars);
72
73            if self.dict.contains_word(&merged_word)
74                && (!self.dict.contains_word(a_chars) || !self.dict.contains_word(b_chars))
75            {
76                lints.push(Lint {
77                    span: Span::new(a.span.start, b.span.end),
78                    lint_kind: LintKind::WordChoice,
79                    suggestions: vec![Suggestion::ReplaceWith(merged_word.to_vec())],
80                    message: "It seems you intended to make this a contraction.".to_owned(),
81                    priority: 63,
82                });
83            }
84        }
85
86        lints
87    }
88
89    fn description(&self) -> &str {
90        "Accidentally inserting a space inside a word is common. This rule looks for valid words that are split by whitespace."
91    }
92}
93
94#[cfg(test)]
95mod tests {
96    use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
97
98    use super::MergeWords;
99
100    #[test]
101    fn clean() {
102        assert_lint_count(
103            "When referring to the political party, make sure to treat them as a proper noun.",
104            MergeWords::default(),
105            0,
106        );
107    }
108
109    #[test]
110    fn heretofore() {
111        assert_lint_count(
112            "This is a her etofore unseen problem.",
113            MergeWords::default(),
114            1,
115        );
116    }
117
118    #[test]
119    fn therefore() {
120        assert_lint_count("The refore", MergeWords::default(), 1);
121    }
122
123    #[test]
124    fn that_is_contraction() {
125        assert_suggestion_result("That s", MergeWords::default(), "That's");
126    }
127
128    #[test]
129    fn allows_issue_722() {
130        assert_lint_count("Leaving S and K alone.", MergeWords::default(), 0);
131        assert_lint_count("Similarly an S with a line.", MergeWords::default(), 0);
132    }
133}