harper_core/linting/
merge_words.rs1use std::sync::Arc;
2
3use itertools::Itertools;
4
5use super::{Lint, LintKind, Linter, Suggestion};
6use crate::spell::{Dictionary, FstDictionary};
7use crate::{CharString, Document, Span};
8
9pub struct MergeWords {
10 dict: Arc<FstDictionary>,
11}
12
13impl MergeWords {
14 pub fn new() -> Self {
15 Self {
16 dict: FstDictionary::curated(),
17 }
18 }
19}
20
21impl Default for MergeWords {
22 fn default() -> Self {
23 Self::new()
24 }
25}
26
27impl Linter for MergeWords {
28 fn lint(&mut self, document: &Document) -> Vec<Lint> {
29 let mut lints = Vec::new();
30
31 let mut merged_word = CharString::new();
32
33 for (a, w, b) in document.tokens().tuple_windows() {
34 if !a.kind.is_word() || !w.kind.is_whitespace() || !b.kind.is_word() {
35 continue;
36 }
37
38 let a_chars = document.get_span_content(&a.span);
39 let b_chars = document.get_span_content(&b.span);
40
41 if (a_chars.len() == 1 && a_chars[0].is_uppercase())
42 || (b_chars.len() == 1 && b_chars[0].is_uppercase())
43 {
44 continue;
45 }
46
47 if matches!(a_chars, ['a']) || matches!(b_chars, ['a']) {
49 continue;
50 }
51
52 merged_word.clear();
53 merged_word.extend_from_slice(a_chars);
54 merged_word.extend_from_slice(b_chars);
55
56 if self.dict.contains_word(&merged_word)
57 && (!self.dict.contains_word(a_chars) || !self.dict.contains_word(b_chars))
58 {
59 lints.push(Lint {
60 span: Span::new(a.span.start, b.span.end),
61 lint_kind: LintKind::WordChoice,
62 suggestions: vec![Suggestion::ReplaceWith(merged_word.to_vec())],
63 message: "It seems these words would go better together.".to_owned(),
64 priority: 63,
65 });
66 }
67
68 merged_word.clear();
69 merged_word.extend_from_slice(a_chars);
70 merged_word.push('\'');
71 merged_word.extend_from_slice(b_chars);
72
73 if self.dict.contains_word(&merged_word)
74 && (!self.dict.contains_word(a_chars) || !self.dict.contains_word(b_chars))
75 {
76 lints.push(Lint {
77 span: Span::new(a.span.start, b.span.end),
78 lint_kind: LintKind::WordChoice,
79 suggestions: vec![Suggestion::ReplaceWith(merged_word.to_vec())],
80 message: "It seems you intended to make this a contraction.".to_owned(),
81 priority: 63,
82 });
83 }
84 }
85
86 lints
87 }
88
89 fn description(&self) -> &str {
90 "Accidentally inserting a space inside a word is common. This rule looks for valid words that are split by whitespace."
91 }
92}
93
94#[cfg(test)]
95mod tests {
96 use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
97
98 use super::MergeWords;
99
100 #[test]
101 fn clean() {
102 assert_lint_count(
103 "When referring to the political party, make sure to treat them as a proper noun.",
104 MergeWords::default(),
105 0,
106 );
107 }
108
109 #[test]
110 fn heretofore() {
111 assert_lint_count(
112 "This is a her etofore unseen problem.",
113 MergeWords::default(),
114 1,
115 );
116 }
117
118 #[test]
119 fn therefore() {
120 assert_lint_count("The refore", MergeWords::default(), 1);
121 }
122
123 #[test]
124 fn that_is_contraction() {
125 assert_suggestion_result("That s", MergeWords::default(), "That's");
126 }
127
128 #[test]
129 fn allows_issue_722() {
130 assert_lint_count("Leaving S and K alone.", MergeWords::default(), 0);
131 assert_lint_count("Similarly an S with a line.", MergeWords::default(), 0);
132 }
133}