harper_core/linting/
mod.rs

1//! Frameworks and rules that locate errors in text.
2//!
3//! See the [`Linter`] trait and the [documentation for authoring a rule](https://writewithharper.com/docs/contributors/author-a-rule) for more information.
4
5mod a_part;
6mod a_while;
7mod addicting;
8mod adjective_double_degree;
9mod adjective_of_a;
10mod after_later;
11mod all_intents_and_purposes;
12mod allow_to;
13mod am_in_the_morning;
14mod amounts_for;
15mod an_a;
16mod and_in;
17mod and_the_like;
18mod another_thing_coming;
19mod another_think_coming;
20mod ask_no_preposition;
21mod avoid_curses;
22mod back_in_the_day;
23mod be_allowed;
24mod best_of_all_time;
25mod boring_words;
26mod bought;
27mod call_them;
28mod cant;
29mod capitalize_personal_pronouns;
30mod cautionary_tale;
31mod change_tack;
32mod chock_full;
33mod closed_compounds;
34mod comma_fixes;
35mod compound_nouns;
36mod compound_subject_i;
37mod confident;
38mod correct_number_suffix;
39mod criteria_phenomena;
40mod currency_placement;
41mod dashes;
42mod despite_of;
43mod determiner_without_noun;
44mod didnt;
45mod discourse_markers;
46mod dot_initialisms;
47mod double_click;
48mod double_modal;
49mod ellipsis_length;
50mod else_possessive;
51mod everyday;
52mod expand_memory_shorthands;
53mod expand_time_shorthands;
54mod expr_linter;
55mod far_be_it;
56mod feel_fell;
57mod few_units_of_time_ago;
58mod filler_words;
59mod find_fine;
60mod first_aid_kit;
61mod for_noun;
62mod free_predicate;
63mod friend_of_me;
64mod go_so_far_as_to;
65mod have_pronoun;
66mod have_take_a_look;
67mod hedging;
68mod hello_greeting;
69mod hereby;
70mod hop_hope;
71mod hope_youre;
72mod how_to;
73mod hyphenate_number_day;
74mod i_am_agreement;
75mod if_wouldve;
76mod in_on_the_cards;
77mod inflected_verb_after_to;
78mod initialism_linter;
79mod initialisms;
80mod interested_in;
81mod it_is;
82mod it_looks_like_that;
83mod it_would_be;
84mod its_contraction;
85mod its_possessive;
86mod left_right_hand;
87mod less_worse;
88mod let_to_do;
89mod lets_confusion;
90mod likewise;
91mod lint;
92mod lint_group;
93mod lint_kind;
94mod long_sentences;
95mod looking_forward_to;
96mod map_phrase_linter;
97mod map_phrase_set_linter;
98mod mass_plurals;
99mod merge_linters;
100mod merge_words;
101mod missing_preposition;
102mod missing_space;
103mod missing_to;
104mod misspell;
105mod mixed_bag;
106mod modal_of;
107mod modal_seem;
108mod months;
109mod more_better;
110mod most_number;
111mod most_of_the_times;
112mod multiple_sequential_pronouns;
113mod nail_on_the_head;
114mod need_to_noun;
115mod no_french_spaces;
116mod no_match_for;
117mod no_oxford_comma;
118mod nobody;
119mod nominal_wants;
120mod noun_countability;
121mod noun_verb_confusion;
122mod number_suffix_capitalization;
123mod of_course;
124mod on_floor;
125mod once_or_twice;
126mod one_and_the_same;
127mod open_compounds;
128mod open_the_light;
129mod orthographic_consistency;
130mod ought_to_be;
131mod out_of_date;
132mod oxford_comma;
133mod oxymorons;
134mod phrasal_verb_as_compound_noun;
135mod phrase_corrections;
136mod phrase_set_corrections;
137mod pique_interest;
138mod possessive_noun;
139mod possessive_your;
140mod progressive_needs_be;
141mod pronoun_are;
142mod pronoun_contraction;
143mod pronoun_inflection_be;
144mod pronoun_knew;
145mod proper_noun_capitalization_linters;
146mod quantifier_needs_of;
147mod quantifier_numeral_conflict;
148mod quite_quiet;
149mod quote_spacing;
150mod redundant_additive_adverbs;
151mod regionalisms;
152mod repeated_words;
153mod roller_skated;
154mod safe_to_save;
155mod save_to_safe;
156mod semicolon_apostrophe;
157mod sentence_capitalization;
158mod shoot_oneself_in_the_foot;
159mod simple_past_to_past_participle;
160mod since_duration;
161mod single_be;
162mod some_without_article;
163mod something_is;
164mod somewhat_something;
165mod sought_after;
166mod spaces;
167mod spell_check;
168mod spelled_numbers;
169mod split_words;
170mod subject_pronoun;
171mod suggestion;
172mod take_serious;
173mod that_than;
174mod that_which;
175mod the_how_why;
176mod the_my;
177mod then_than;
178mod theres;
179mod theses_these;
180mod thing_think;
181mod though_thought;
182mod throw_away;
183mod throw_rubbish;
184mod to_adverb;
185mod to_two_too;
186mod touristic;
187mod unclosed_quotes;
188mod update_place_names;
189mod use_genitive;
190mod verb_to_adjective;
191mod very_unique;
192mod vice_versa;
193mod was_aloud;
194mod way_too_adjective;
195mod well_educated;
196mod whereas;
197mod widely_accepted;
198mod win_prize;
199mod wish_could;
200mod wordpress_dotcom;
201mod would_never_have;
202
203pub use expr_linter::ExprLinter;
204pub use initialism_linter::InitialismLinter;
205pub use lint::Lint;
206pub use lint_group::{LintGroup, LintGroupConfig};
207pub use lint_kind::LintKind;
208pub use map_phrase_linter::MapPhraseLinter;
209pub use map_phrase_set_linter::MapPhraseSetLinter;
210pub use spell_check::SpellCheck;
211pub use suggestion::Suggestion;
212
213use crate::{Document, LSend, render_markdown};
214
215/// A __stateless__ rule that searches documents for grammatical errors.
216///
217/// Commonly implemented via [`ExprLinter`].
218///
219/// See also: [`LintGroup`].
220pub trait Linter: LSend {
221    /// Analyzes a document and produces zero or more [`Lint`]s.
222    /// We pass `self` mutably for caching purposes.
223    fn lint(&mut self, document: &Document) -> Vec<Lint>;
224    /// A user-facing description of what kinds of grammatical errors this rule looks for.
225    /// It is usually shown in settings menus.
226    fn description(&self) -> &str;
227}
228
229/// A blanket-implemented trait that renders the Markdown description field of a linter to HTML.
230pub trait HtmlDescriptionLinter {
231    fn description_html(&self) -> String;
232}
233
234impl<L: ?Sized> HtmlDescriptionLinter for L
235where
236    L: Linter,
237{
238    fn description_html(&self) -> String {
239        let desc = self.description();
240        render_markdown(desc)
241    }
242}
243
244#[cfg(test)]
245pub mod tests {
246    use crate::{Document, Span, Token, parsers::PlainEnglish};
247    use hashbrown::HashSet;
248
249    /// Extension trait for converting spans of tokens back to their original text
250    pub trait SpanVecExt {
251        fn to_strings(&self, doc: &Document) -> Vec<String>;
252    }
253
254    impl SpanVecExt for Vec<Span<Token>> {
255        fn to_strings(&self, doc: &Document) -> Vec<String> {
256            self.iter()
257                .map(|sp| {
258                    doc.get_tokens()[sp.start..sp.end]
259                        .iter()
260                        .map(|tok| doc.get_span_content_str(&tok.span))
261                        .collect::<String>()
262                })
263                .collect()
264        }
265    }
266
267    use super::Linter;
268    use crate::spell::FstDictionary;
269
270    #[track_caller]
271    pub fn assert_no_lints(text: &str, linter: impl Linter) {
272        assert_lint_count(text, linter, 0);
273    }
274
275    #[track_caller]
276    pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) {
277        let test = Document::new_markdown_default_curated(text);
278        let lints = linter.lint(&test);
279        dbg!(&lints);
280        if lints.len() != count {
281            panic!(
282                "Expected \"{text}\" to create {count} lints, but it created {}.",
283                lints.len()
284            );
285        }
286    }
287
288    /// Assert the total number of suggestions produced by a [`Linter`], spread across all produced
289    /// [`Lint`]s.
290    #[track_caller]
291    pub fn assert_suggestion_count(text: &str, mut linter: impl Linter, count: usize) {
292        let test = Document::new_markdown_default_curated(text);
293        let lints = linter.lint(&test);
294        assert_eq!(
295            lints.iter().map(|l| l.suggestions.len()).sum::<usize>(),
296            count
297        );
298    }
299
300    /// Runs a provided linter on text, applies the first suggestion from each lint
301    /// and asserts whether the result is equal to a given value.
302    #[track_caller]
303    pub fn assert_suggestion_result(text: &str, linter: impl Linter, expected_result: &str) {
304        assert_nth_suggestion_result(text, linter, expected_result, 0);
305    }
306
307    /// Runs a provided linter on text, applies the nth suggestion from each lint
308    /// and asserts whether the result is equal to a given value.
309    ///
310    /// Note that `n` starts at zero.
311    #[track_caller]
312    pub fn assert_nth_suggestion_result(
313        text: &str,
314        mut linter: impl Linter,
315        expected_result: &str,
316        n: usize,
317    ) {
318        let transformed_str = transform_nth_str(text, &mut linter, n);
319
320        if transformed_str.as_str() != expected_result {
321            panic!("Expected \"{expected_result}\"\n But got  \"{transformed_str}\"");
322        }
323
324        // Applying the suggestions should fix all the lints.
325        assert_lint_count(&transformed_str, linter, 0);
326    }
327
328    #[track_caller]
329    pub fn assert_top3_suggestion_result(
330        text: &str,
331        mut linter: impl Linter,
332        expected_result: &str,
333    ) {
334        let zeroth = transform_nth_str(text, &mut linter, 0);
335        let first = transform_nth_str(text, &mut linter, 1);
336        let second = transform_nth_str(text, &mut linter, 2);
337
338        match (
339            zeroth.as_str() == expected_result,
340            first.as_str() == expected_result,
341            second.as_str() == expected_result,
342        ) {
343            (true, false, false) => assert_lint_count(&zeroth, linter, 0),
344            (false, true, false) => assert_lint_count(&first, linter, 0),
345            (false, false, true) => assert_lint_count(&second, linter, 0),
346            (false, false, false) => panic!(
347                "None of the top 3 suggestions produced the expected result:\n\
348                Expected: \"{expected_result}\"\n\
349                Got:\n\
350                [0]: \"{zeroth}\"\n\
351                [1]: \"{first}\"\n\
352                [2]: \"{second}\""
353            ),
354            // I think it's not possible for more than one suggestion to be correct
355            _ => {}
356        }
357    }
358
359    /// Asserts that none of the suggestions from the linter match the given text.
360    #[track_caller]
361    pub fn assert_not_in_suggestion_result(
362        text: &str,
363        mut linter: impl Linter,
364        bad_suggestion: &str,
365    ) {
366        let test = Document::new_markdown_default_curated(text);
367        let lints = linter.lint(&test);
368
369        for (i, lint) in lints.iter().enumerate() {
370            for (j, suggestion) in lint.suggestions.iter().enumerate() {
371                let mut text_chars: Vec<char> = text.chars().collect();
372                suggestion.apply(lint.span, &mut text_chars);
373                let suggestion_text: String = text_chars.into_iter().collect();
374
375                if suggestion_text == bad_suggestion {
376                    panic!(
377                        "Found undesired suggestion at lint[{i}].suggestions[{j}]:\n\
378                        Expected to not find suggestion: \"{bad_suggestion}\"\n\
379                        But found: \"{suggestion_text}\""
380                    );
381                }
382            }
383        }
384    }
385
386    /// Asserts both that the given text matches the expected good suggestions and that none of the
387    /// suggestions are in the bad suggestions list.
388    #[track_caller]
389    pub fn assert_good_and_bad_suggestions(
390        text: &str,
391        mut linter: impl Linter,
392        good: &[&str],
393        bad: &[&str],
394    ) {
395        let test = Document::new_markdown_default_curated(text);
396        let lints = linter.lint(&test);
397
398        let mut unseen_good: HashSet<_> = good.iter().cloned().collect();
399        let mut found_bad = Vec::new();
400        let mut found_good = Vec::new();
401
402        for (i, lint) in lints.into_iter().enumerate() {
403            for (j, suggestion) in lint.suggestions.into_iter().enumerate() {
404                let mut text_chars: Vec<char> = text.chars().collect();
405                suggestion.apply(lint.span, &mut text_chars);
406                let suggestion_text: String = text_chars.into_iter().collect();
407
408                // Check for bad suggestions
409                if bad.contains(&&*suggestion_text) {
410                    found_bad.push((i, j, suggestion_text.clone()));
411                    eprintln!(
412                        "  ❌ Found bad suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
413                    );
414                }
415                // Check for good suggestions
416                else if good.contains(&&*suggestion_text) {
417                    found_good.push((i, j, suggestion_text.clone()));
418                    eprintln!(
419                        "  ✅ Found good suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
420                    );
421                    unseen_good.remove(suggestion_text.as_str());
422                }
423            }
424        }
425
426        // Print summary
427        if !found_bad.is_empty() || !unseen_good.is_empty() {
428            eprintln!("\n=== Test Summary ===");
429
430            // In the summary section, change these loops:
431            if !found_bad.is_empty() {
432                eprintln!("\n❌ Found {} bad suggestions:", found_bad.len());
433                for (i, j, text) in &found_bad {
434                    eprintln!("  - lint[{i}].suggestions[{j}]: \"{text}\"");
435                }
436            }
437
438            // And for the good suggestions:
439            if !unseen_good.is_empty() {
440                eprintln!(
441                    "\n❌ Missing {} expected good suggestions:",
442                    unseen_good.len()
443                );
444                for text in &unseen_good {
445                    eprintln!("  - \"{text}\"");
446                }
447            }
448
449            eprintln!("\n✅ Found {} good suggestions", found_good.len());
450            eprintln!("==================\n");
451
452            if !found_bad.is_empty() || !unseen_good.is_empty() {
453                panic!("Test failed - see error output above");
454            }
455        } else {
456            eprintln!(
457                "\n✅ All {} good suggestions found, no bad suggestions\n",
458                found_good.len()
459            );
460        }
461    }
462
463    fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
464        let mut text_chars: Vec<char> = text.chars().collect();
465
466        let mut iter_count = 0;
467
468        loop {
469            let test = Document::new_from_vec(
470                text_chars.clone().into(),
471                &PlainEnglish,
472                &FstDictionary::curated(),
473            );
474            let lints = linter.lint(&test);
475
476            if let Some(lint) = lints.first() {
477                if let Some(sug) = lint.suggestions.get(n) {
478                    sug.apply(lint.span, &mut text_chars);
479
480                    let transformed_str: String = text_chars.iter().collect();
481                    dbg!(transformed_str);
482                } else {
483                    break;
484                }
485            } else {
486                break;
487            }
488
489            iter_count += 1;
490
491            if iter_count == 100 {
492                break;
493            }
494        }
495
496        eprintln!("Corrected {iter_count} times.");
497
498        text_chars.iter().collect()
499    }
500}