harper_core/linting/
mod.rs

1//! Frameworks and rules that locate errors in text.
2//!
3//! See the [`Linter`] trait and the [documentation for authoring a rule](https://writewithharper.com/docs/contributors/author-a-rule) for more information.
4
5mod a_part;
6mod a_while;
7mod addicting;
8mod adjective_double_degree;
9mod adjective_of_a;
10mod after_later;
11mod all_intents_and_purposes;
12mod allow_to;
13mod am_in_the_morning;
14mod amounts_for;
15mod an_a;
16mod and_in;
17mod and_the_like;
18mod another_thing_coming;
19mod another_think_coming;
20mod ask_no_preposition;
21mod avoid_curses;
22mod back_in_the_day;
23mod be_allowed;
24mod best_of_all_time;
25mod boring_words;
26mod bought;
27mod call_them;
28mod cant;
29mod capitalize_personal_pronouns;
30mod cautionary_tale;
31mod change_tack;
32mod chock_full;
33mod closed_compounds;
34mod comma_fixes;
35mod compound_nouns;
36mod compound_subject_i;
37mod confident;
38mod correct_number_suffix;
39mod criteria_phenomena;
40mod currency_placement;
41mod dashes;
42mod despite_of;
43mod determiner_without_noun;
44mod didnt;
45mod discourse_markers;
46mod dot_initialisms;
47mod double_click;
48mod double_modal;
49mod ellipsis_length;
50mod else_possessive;
51mod everyday;
52mod expand_memory_shorthands;
53mod expand_time_shorthands;
54mod expr_linter;
55mod far_be_it;
56mod feel_fell;
57mod few_units_of_time_ago;
58mod filler_words;
59mod find_fine;
60mod first_aid_kit;
61mod for_noun;
62mod free_predicate;
63mod friend_of_me;
64mod go_so_far_as_to;
65mod have_pronoun;
66mod have_take_a_look;
67mod hedging;
68mod hello_greeting;
69mod hereby;
70mod hop_hope;
71mod hope_youre;
72mod how_to;
73mod hyphenate_number_day;
74mod i_am_agreement;
75mod if_wouldve;
76mod in_on_the_cards;
77mod inflected_verb_after_to;
78mod initialism_linter;
79mod initialisms;
80mod interested_in;
81mod it_is;
82mod it_looks_like_that;
83mod it_would_be;
84mod its_contraction;
85mod its_possessive;
86mod left_right_hand;
87mod less_worse;
88mod let_to_do;
89mod lets_confusion;
90mod likewise;
91mod lint;
92mod lint_group;
93mod lint_kind;
94mod long_sentences;
95mod looking_forward_to;
96mod map_phrase_linter;
97mod map_phrase_set_linter;
98mod mass_plurals;
99mod merge_linters;
100mod merge_words;
101mod missing_preposition;
102mod missing_space;
103mod missing_to;
104mod misspell;
105mod mixed_bag;
106mod modal_of;
107mod modal_seem;
108mod months;
109mod more_better;
110mod most_number;
111mod most_of_the_times;
112mod multiple_sequential_pronouns;
113mod nail_on_the_head;
114mod need_to_noun;
115mod no_french_spaces;
116mod no_match_for;
117mod no_oxford_comma;
118mod nobody;
119mod nominal_wants;
120mod noun_countability;
121mod noun_verb_confusion;
122mod number_suffix_capitalization;
123mod of_course;
124mod on_floor;
125mod once_or_twice;
126mod one_and_the_same;
127mod open_compounds;
128mod open_the_light;
129mod orthographic_consistency;
130mod ought_to_be;
131mod out_of_date;
132mod oxford_comma;
133mod oxymorons;
134mod phrasal_verb_as_compound_noun;
135mod phrase_corrections;
136mod phrase_set_corrections;
137mod pique_interest;
138mod possessive_noun;
139mod possessive_your;
140mod progressive_needs_be;
141mod pronoun_are;
142mod pronoun_contraction;
143mod pronoun_inflection_be;
144mod pronoun_knew;
145mod proper_noun_capitalization_linters;
146mod quantifier_needs_of;
147mod quantifier_numeral_conflict;
148mod quite_quiet;
149mod quote_spacing;
150mod redundant_additive_adverbs;
151mod regionalisms;
152mod repeated_words;
153mod roller_skated;
154mod safe_to_save;
155mod save_to_safe;
156mod semicolon_apostrophe;
157mod sentence_capitalization;
158mod shoot_oneself_in_the_foot;
159mod simple_past_to_past_participle;
160mod since_duration;
161mod single_be;
162mod some_without_article;
163mod something_is;
164mod somewhat_something;
165mod sought_after;
166mod spaces;
167mod spell_check;
168mod spelled_numbers;
169mod split_words;
170mod subject_pronoun;
171mod suggestion;
172mod take_serious;
173mod that_than;
174mod that_which;
175mod the_how_why;
176mod the_my;
177mod then_than;
178mod theres;
179mod theses_these;
180mod thing_think;
181mod though_thought;
182mod throw_away;
183mod throw_rubbish;
184mod to_adverb;
185mod to_two_too;
186mod touristic;
187mod unclosed_quotes;
188mod update_place_names;
189mod use_genitive;
190mod verb_to_adjective;
191mod very_unique;
192mod vice_versa;
193mod was_aloud;
194mod way_too_adjective;
195mod well_educated;
196mod whereas;
197mod widely_accepted;
198mod win_prize;
199mod wordpress_dotcom;
200mod would_never_have;
201
202pub use expr_linter::ExprLinter;
203pub use initialism_linter::InitialismLinter;
204pub use lint::Lint;
205pub use lint_group::{LintGroup, LintGroupConfig};
206pub use lint_kind::LintKind;
207pub use map_phrase_linter::MapPhraseLinter;
208pub use map_phrase_set_linter::MapPhraseSetLinter;
209pub use spell_check::SpellCheck;
210pub use suggestion::Suggestion;
211
212use crate::{Document, LSend, render_markdown};
213
214/// A __stateless__ rule that searches documents for grammatical errors.
215///
216/// Commonly implemented via [`ExprLinter`].
217///
218/// See also: [`LintGroup`].
219pub trait Linter: LSend {
220    /// Analyzes a document and produces zero or more [`Lint`]s.
221    /// We pass `self` mutably for caching purposes.
222    fn lint(&mut self, document: &Document) -> Vec<Lint>;
223    /// A user-facing description of what kinds of grammatical errors this rule looks for.
224    /// It is usually shown in settings menus.
225    fn description(&self) -> &str;
226}
227
228/// A blanket-implemented trait that renders the Markdown description field of a linter to HTML.
229pub trait HtmlDescriptionLinter {
230    fn description_html(&self) -> String;
231}
232
233impl<L: ?Sized> HtmlDescriptionLinter for L
234where
235    L: Linter,
236{
237    fn description_html(&self) -> String {
238        let desc = self.description();
239        render_markdown(desc)
240    }
241}
242
243#[cfg(test)]
244pub mod tests {
245    use crate::{Document, Span, Token, parsers::PlainEnglish};
246    use hashbrown::HashSet;
247
248    /// Extension trait for converting spans of tokens back to their original text
249    pub trait SpanVecExt {
250        fn to_strings(&self, doc: &Document) -> Vec<String>;
251    }
252
253    impl SpanVecExt for Vec<Span<Token>> {
254        fn to_strings(&self, doc: &Document) -> Vec<String> {
255            self.iter()
256                .map(|sp| {
257                    doc.get_tokens()[sp.start..sp.end]
258                        .iter()
259                        .map(|tok| doc.get_span_content_str(&tok.span))
260                        .collect::<String>()
261                })
262                .collect()
263        }
264    }
265
266    use super::Linter;
267    use crate::spell::FstDictionary;
268
269    #[track_caller]
270    pub fn assert_no_lints(text: &str, linter: impl Linter) {
271        assert_lint_count(text, linter, 0);
272    }
273
274    #[track_caller]
275    pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) {
276        let test = Document::new_markdown_default_curated(text);
277        let lints = linter.lint(&test);
278        dbg!(&lints);
279        if lints.len() != count {
280            panic!(
281                "Expected \"{text}\" to create {count} lints, but it created {}.",
282                lints.len()
283            );
284        }
285    }
286
287    /// Assert the total number of suggestions produced by a [`Linter`], spread across all produced
288    /// [`Lint`]s.
289    #[track_caller]
290    pub fn assert_suggestion_count(text: &str, mut linter: impl Linter, count: usize) {
291        let test = Document::new_markdown_default_curated(text);
292        let lints = linter.lint(&test);
293        assert_eq!(
294            lints.iter().map(|l| l.suggestions.len()).sum::<usize>(),
295            count
296        );
297    }
298
299    /// Runs a provided linter on text, applies the first suggestion from each lint
300    /// and asserts whether the result is equal to a given value.
301    #[track_caller]
302    pub fn assert_suggestion_result(text: &str, linter: impl Linter, expected_result: &str) {
303        assert_nth_suggestion_result(text, linter, expected_result, 0);
304    }
305
306    /// Runs a provided linter on text, applies the nth suggestion from each lint
307    /// and asserts whether the result is equal to a given value.
308    ///
309    /// Note that `n` starts at zero.
310    #[track_caller]
311    pub fn assert_nth_suggestion_result(
312        text: &str,
313        mut linter: impl Linter,
314        expected_result: &str,
315        n: usize,
316    ) {
317        let transformed_str = transform_nth_str(text, &mut linter, n);
318
319        if transformed_str.as_str() != expected_result {
320            panic!("Expected \"{expected_result}\"\n But got  \"{transformed_str}\"");
321        }
322
323        // Applying the suggestions should fix all the lints.
324        assert_lint_count(&transformed_str, linter, 0);
325    }
326
327    #[track_caller]
328    pub fn assert_top3_suggestion_result(
329        text: &str,
330        mut linter: impl Linter,
331        expected_result: &str,
332    ) {
333        let zeroth = transform_nth_str(text, &mut linter, 0);
334        let first = transform_nth_str(text, &mut linter, 1);
335        let second = transform_nth_str(text, &mut linter, 2);
336
337        match (
338            zeroth.as_str() == expected_result,
339            first.as_str() == expected_result,
340            second.as_str() == expected_result,
341        ) {
342            (true, false, false) => assert_lint_count(&zeroth, linter, 0),
343            (false, true, false) => assert_lint_count(&first, linter, 0),
344            (false, false, true) => assert_lint_count(&second, linter, 0),
345            (false, false, false) => panic!(
346                "None of the top 3 suggestions produced the expected result:\n\
347                Expected: \"{expected_result}\"\n\
348                Got:\n\
349                [0]: \"{zeroth}\"\n\
350                [1]: \"{first}\"\n\
351                [2]: \"{second}\""
352            ),
353            // I think it's not possible for more than one suggestion to be correct
354            _ => {}
355        }
356    }
357
358    /// Asserts that none of the suggestions from the linter match the given text.
359    #[track_caller]
360    pub fn assert_not_in_suggestion_result(
361        text: &str,
362        mut linter: impl Linter,
363        bad_suggestion: &str,
364    ) {
365        let test = Document::new_markdown_default_curated(text);
366        let lints = linter.lint(&test);
367
368        for (i, lint) in lints.iter().enumerate() {
369            for (j, suggestion) in lint.suggestions.iter().enumerate() {
370                let mut text_chars: Vec<char> = text.chars().collect();
371                suggestion.apply(lint.span, &mut text_chars);
372                let suggestion_text: String = text_chars.into_iter().collect();
373
374                if suggestion_text == bad_suggestion {
375                    panic!(
376                        "Found undesired suggestion at lint[{i}].suggestions[{j}]:\n\
377                        Expected to not find suggestion: \"{bad_suggestion}\"\n\
378                        But found: \"{suggestion_text}\""
379                    );
380                }
381            }
382        }
383    }
384
385    /// Asserts both that the given text matches the expected good suggestions and that none of the
386    /// suggestions are in the bad suggestions list.
387    #[track_caller]
388    pub fn assert_good_and_bad_suggestions(
389        text: &str,
390        mut linter: impl Linter,
391        good: &[&str],
392        bad: &[&str],
393    ) {
394        let test = Document::new_markdown_default_curated(text);
395        let lints = linter.lint(&test);
396
397        let mut unseen_good: HashSet<_> = good.iter().cloned().collect();
398        let mut found_bad = Vec::new();
399        let mut found_good = Vec::new();
400
401        for (i, lint) in lints.into_iter().enumerate() {
402            for (j, suggestion) in lint.suggestions.into_iter().enumerate() {
403                let mut text_chars: Vec<char> = text.chars().collect();
404                suggestion.apply(lint.span, &mut text_chars);
405                let suggestion_text: String = text_chars.into_iter().collect();
406
407                // Check for bad suggestions
408                if bad.contains(&&*suggestion_text) {
409                    found_bad.push((i, j, suggestion_text.clone()));
410                    eprintln!(
411                        "  ❌ Found bad suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
412                    );
413                }
414                // Check for good suggestions
415                else if good.contains(&&*suggestion_text) {
416                    found_good.push((i, j, suggestion_text.clone()));
417                    eprintln!(
418                        "  ✅ Found good suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
419                    );
420                    unseen_good.remove(suggestion_text.as_str());
421                }
422            }
423        }
424
425        // Print summary
426        if !found_bad.is_empty() || !unseen_good.is_empty() {
427            eprintln!("\n=== Test Summary ===");
428
429            // In the summary section, change these loops:
430            if !found_bad.is_empty() {
431                eprintln!("\n❌ Found {} bad suggestions:", found_bad.len());
432                for (i, j, text) in &found_bad {
433                    eprintln!("  - lint[{i}].suggestions[{j}]: \"{text}\"");
434                }
435            }
436
437            // And for the good suggestions:
438            if !unseen_good.is_empty() {
439                eprintln!(
440                    "\n❌ Missing {} expected good suggestions:",
441                    unseen_good.len()
442                );
443                for text in &unseen_good {
444                    eprintln!("  - \"{text}\"");
445                }
446            }
447
448            eprintln!("\n✅ Found {} good suggestions", found_good.len());
449            eprintln!("==================\n");
450
451            if !found_bad.is_empty() || !unseen_good.is_empty() {
452                panic!("Test failed - see error output above");
453            }
454        } else {
455            eprintln!(
456                "\n✅ All {} good suggestions found, no bad suggestions\n",
457                found_good.len()
458            );
459        }
460    }
461
462    fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
463        let mut text_chars: Vec<char> = text.chars().collect();
464
465        let mut iter_count = 0;
466
467        loop {
468            let test = Document::new_from_vec(
469                text_chars.clone().into(),
470                &PlainEnglish,
471                &FstDictionary::curated(),
472            );
473            let lints = linter.lint(&test);
474
475            if let Some(lint) = lints.first() {
476                if let Some(sug) = lint.suggestions.get(n) {
477                    sug.apply(lint.span, &mut text_chars);
478
479                    let transformed_str: String = text_chars.iter().collect();
480                    dbg!(transformed_str);
481                } else {
482                    break;
483                }
484            } else {
485                break;
486            }
487
488            iter_count += 1;
489
490            if iter_count == 100 {
491                break;
492            }
493        }
494
495        eprintln!("Corrected {iter_count} times.");
496
497        text_chars.iter().collect()
498    }
499}
harper_core/linting/mod.rs

harper_core/linting/
mod.rs