harper_core/linting/
mod.rs

1//! Frameworks and rules that locate errors in text.
2//!
3//! See the [`Linter`] trait and the [documentation for authoring a rule](https://writewithharper.com/docs/contributors/author-a-rule) for more information.
4
5mod a_part;
6mod addicting;
7mod adjective_double_degree;
8mod adjective_of_a;
9mod all_intents_and_purposes;
10mod am_in_the_morning;
11mod amounts_for;
12mod an_a;
13mod another_thing_coming;
14mod another_think_coming;
15mod ask_no_preposition;
16mod avoid_curses;
17mod back_in_the_day;
18mod best_of_all_time;
19mod boring_words;
20mod cant;
21mod capitalize_personal_pronouns;
22mod chock_full;
23mod closed_compounds;
24mod comma_fixes;
25mod compound_nouns;
26mod confident;
27mod correct_number_suffix;
28mod criteria_phenomena;
29mod currency_placement;
30mod dashes;
31mod despite_of;
32mod discourse_markers;
33mod dot_initialisms;
34mod double_modal;
35mod ellipsis_length;
36mod else_possessive;
37mod everyday;
38mod expand_memory_shorthands;
39mod expand_time_shorthands;
40mod expr_linter;
41mod feel_fell;
42mod few_units_of_time_ago;
43mod filler_words;
44mod first_aid_kit;
45mod for_noun;
46mod friend_of_me;
47mod have_pronoun;
48mod have_take_a_look;
49mod hedging;
50mod hereby;
51mod hop_hope;
52mod hope_youre;
53mod how_to;
54mod hyphenate_number_day;
55mod i_am_agreement;
56mod in_on_the_cards;
57mod inflected_verb_after_to;
58mod initialism_linter;
59mod initialisms;
60mod interested_in;
61mod it_is;
62mod it_looks_like_that;
63mod it_would_be;
64mod its_contraction;
65mod its_possessive;
66mod left_right_hand;
67mod less_worse;
68mod lets_confusion;
69mod likewise;
70mod lint;
71mod lint_group;
72mod lint_kind;
73mod long_sentences;
74mod looking_forward_to;
75mod map_phrase_linter;
76mod map_phrase_set_linter;
77mod mass_plurals;
78mod merge_linters;
79mod merge_words;
80mod missing_preposition;
81mod missing_to;
82mod mixed_bag;
83mod modal_of;
84mod months;
85mod most_number;
86mod multiple_sequential_pronouns;
87mod nail_on_the_head;
88mod no_french_spaces;
89mod no_match_for;
90mod no_oxford_comma;
91mod nobody;
92mod nominal_wants;
93mod noun_countability;
94mod noun_verb_confusion;
95mod number_suffix_capitalization;
96mod of_course;
97mod on_floor;
98mod one_and_the_same;
99mod open_compounds;
100mod open_the_light;
101mod ought_to_be;
102mod out_of_date;
103mod oxford_comma;
104mod oxymorons;
105mod phrasal_verb_as_compound_noun;
106mod phrase_corrections;
107mod phrase_set_corrections;
108mod pique_interest;
109mod possessive_noun;
110mod possessive_your;
111mod progressive_needs_be;
112mod pronoun_contraction;
113mod pronoun_inflection_be;
114mod pronoun_knew;
115mod proper_noun_capitalization_linters;
116mod quantifier_needs_of;
117mod quite_quiet;
118mod quote_spacing;
119mod redundant_additive_adverbs;
120mod regionalisms;
121mod repeated_words;
122mod save_to_safe;
123mod semicolon_apostrophe;
124mod sentence_capitalization;
125mod shoot_oneself_in_the_foot;
126mod simple_past_to_past_participle;
127mod since_duration;
128mod somewhat_something;
129mod sought_after;
130mod spaces;
131mod spell_check;
132mod spelled_numbers;
133mod suggestion;
134mod take_serious;
135mod that_than;
136mod that_which;
137mod the_how_why;
138mod the_my;
139mod then_than;
140mod thing_think;
141mod though_thought;
142mod throw_rubbish;
143mod to_two_too;
144mod touristic;
145mod unclosed_quotes;
146mod update_place_names;
147mod use_genitive;
148mod very_unique;
149mod was_aloud;
150mod way_too_adjective;
151mod whereas;
152mod widely_accepted;
153mod win_prize;
154mod wordpress_dotcom;
155mod would_never_have;
156
157pub use a_part::APart;
158pub use addicting::Addicting;
159pub use adjective_double_degree::AdjectiveDoubleDegree;
160pub use adjective_of_a::AdjectiveOfA;
161pub use all_intents_and_purposes::AllIntentsAndPurposes;
162pub use am_in_the_morning::AmInTheMorning;
163pub use amounts_for::AmountsFor;
164pub use an_a::AnA;
165pub use another_thing_coming::AnotherThingComing;
166pub use another_think_coming::AnotherThinkComing;
167pub use ask_no_preposition::AskNoPreposition;
168pub use avoid_curses::AvoidCurses;
169pub use back_in_the_day::BackInTheDay;
170pub use best_of_all_time::BestOfAllTime;
171pub use boring_words::BoringWords;
172pub use cant::Cant;
173pub use capitalize_personal_pronouns::CapitalizePersonalPronouns;
174pub use chock_full::ChockFull;
175pub use comma_fixes::CommaFixes;
176pub use compound_nouns::CompoundNouns;
177pub use confident::Confident;
178pub use correct_number_suffix::CorrectNumberSuffix;
179pub use criteria_phenomena::CriteriaPhenomena;
180pub use currency_placement::CurrencyPlacement;
181pub use dashes::Dashes;
182pub use despite_of::DespiteOf;
183pub use discourse_markers::DiscourseMarkers;
184pub use dot_initialisms::DotInitialisms;
185pub use double_modal::DoubleModal;
186pub use ellipsis_length::EllipsisLength;
187pub use everyday::Everyday;
188pub use expand_memory_shorthands::ExpandMemoryShorthands;
189pub use expand_time_shorthands::ExpandTimeShorthands;
190pub use expr_linter::ExprLinter;
191pub use feel_fell::FeelFell;
192pub use few_units_of_time_ago::FewUnitsOfTimeAgo;
193pub use filler_words::FillerWords;
194pub use for_noun::ForNoun;
195pub use friend_of_me::FriendOfMe;
196pub use have_pronoun::HavePronoun;
197pub use have_take_a_look::HaveTakeALook;
198pub use hedging::Hedging;
199pub use hereby::Hereby;
200pub use hop_hope::HopHope;
201pub use how_to::HowTo;
202pub use hyphenate_number_day::HyphenateNumberDay;
203pub use i_am_agreement::IAmAgreement;
204pub use in_on_the_cards::InOnTheCards;
205pub use inflected_verb_after_to::InflectedVerbAfterTo;
206pub use initialism_linter::InitialismLinter;
207pub use interested_in::InterestedIn;
208pub use it_looks_like_that::ItLooksLikeThat;
209pub use its_contraction::ItsContraction;
210pub use its_possessive::ItsPossessive;
211pub use left_right_hand::LeftRightHand;
212pub use less_worse::LessWorse;
213pub use lets_confusion::LetsConfusion;
214pub use likewise::Likewise;
215pub use lint::Lint;
216pub use lint_group::{LintGroup, LintGroupConfig};
217pub use lint_kind::LintKind;
218pub use long_sentences::LongSentences;
219pub use looking_forward_to::LookingForwardTo;
220pub use map_phrase_linter::MapPhraseLinter;
221pub use map_phrase_set_linter::MapPhraseSetLinter;
222pub use mass_plurals::MassPlurals;
223pub use merge_words::MergeWords;
224pub use missing_preposition::MissingPreposition;
225pub use missing_to::MissingTo;
226pub use mixed_bag::MixedBag;
227pub use modal_of::ModalOf;
228pub use months::Months;
229pub use most_number::MostNumber;
230pub use multiple_sequential_pronouns::MultipleSequentialPronouns;
231pub use nail_on_the_head::NailOnTheHead;
232pub use no_french_spaces::NoFrenchSpaces;
233pub use no_match_for::NoMatchFor;
234pub use no_oxford_comma::NoOxfordComma;
235pub use nobody::Nobody;
236pub use noun_countability::NounCountability;
237pub use noun_verb_confusion::NounVerbConfusion;
238pub use number_suffix_capitalization::NumberSuffixCapitalization;
239pub use of_course::OfCourse;
240pub use on_floor::OnFloor;
241pub use one_and_the_same::OneAndTheSame;
242pub use open_the_light::OpenTheLight;
243pub use ought_to_be::OughtToBe;
244pub use out_of_date::OutOfDate;
245pub use oxford_comma::OxfordComma;
246pub use oxymorons::Oxymorons;
247pub use phrasal_verb_as_compound_noun::PhrasalVerbAsCompoundNoun;
248pub use pique_interest::PiqueInterest;
249pub use possessive_noun::PossessiveNoun;
250pub use possessive_your::PossessiveYour;
251pub use progressive_needs_be::ProgressiveNeedsBe;
252pub use pronoun_contraction::PronounContraction;
253pub use pronoun_inflection_be::PronounInflectionBe;
254pub use quantifier_needs_of::QuantifierNeedsOf;
255pub use quite_quiet::QuiteQuiet;
256pub use quote_spacing::QuoteSpacing;
257pub use redundant_additive_adverbs::RedundantAdditiveAdverbs;
258pub use regionalisms::Regionalisms;
259pub use repeated_words::RepeatedWords;
260pub use save_to_safe::SaveToSafe;
261pub use semicolon_apostrophe::SemicolonApostrophe;
262pub use sentence_capitalization::SentenceCapitalization;
263pub use shoot_oneself_in_the_foot::ShootOneselfInTheFoot;
264pub use simple_past_to_past_participle::SimplePastToPastParticiple;
265pub use since_duration::SinceDuration;
266pub use somewhat_something::SomewhatSomething;
267pub use sought_after::SoughtAfter;
268pub use spaces::Spaces;
269pub use spell_check::SpellCheck;
270pub use spelled_numbers::SpelledNumbers;
271pub use suggestion::Suggestion;
272pub use take_serious::TakeSerious;
273pub use that_than::ThatThan;
274pub use that_which::ThatWhich;
275pub use the_how_why::TheHowWhy;
276pub use the_my::TheMy;
277pub use then_than::ThenThan;
278pub use thing_think::ThingThink;
279pub use though_thought::ThoughThought;
280pub use throw_rubbish::ThrowRubbish;
281pub use to_two_too::ToTwoToo;
282pub use touristic::Touristic;
283pub use unclosed_quotes::UnclosedQuotes;
284pub use update_place_names::UpdatePlaceNames;
285pub use use_genitive::UseGenitive;
286pub use very_unique::VeryUnique;
287pub use was_aloud::WasAloud;
288pub use way_too_adjective::WayTooAdjective;
289pub use whereas::Whereas;
290pub use widely_accepted::WidelyAccepted;
291pub use win_prize::WinPrize;
292pub use wordpress_dotcom::WordPressDotcom;
293pub use would_never_have::WouldNeverHave;
294
295use crate::{Document, LSend, render_markdown};
296
297/// A __stateless__ rule that searches documents for grammatical errors.
298///
299/// Commonly implemented via [`ExprLinter`].
300///
301/// See also: [`LintGroup`].
302pub trait Linter: LSend {
303    /// Analyzes a document and produces zero or more [`Lint`]s.
304    /// We pass `self` mutably for caching purposes.
305    fn lint(&mut self, document: &Document) -> Vec<Lint>;
306    /// A user-facing description of what kinds of grammatical errors this rule looks for.
307    /// It is usually shown in settings menus.
308    fn description(&self) -> &str;
309}
310
311/// A blanket-implemented trait that renders the Markdown description field of a linter to HTML.
312pub trait HtmlDescriptionLinter {
313    fn description_html(&self) -> String;
314}
315
316impl<L: ?Sized> HtmlDescriptionLinter for L
317where
318    L: Linter,
319{
320    fn description_html(&self) -> String {
321        let desc = self.description();
322        render_markdown(desc)
323    }
324}
325
326#[cfg(test)]
327pub mod tests {
328    use crate::{Document, Span, Token, parsers::PlainEnglish};
329    use hashbrown::HashSet;
330
331    /// Extension trait for converting spans of tokens back to their original text
332    pub trait SpanVecExt {
333        fn to_strings(&self, doc: &Document) -> Vec<String>;
334    }
335
336    impl SpanVecExt for Vec<Span<Token>> {
337        fn to_strings(&self, doc: &Document) -> Vec<String> {
338            self.iter()
339                .map(|sp| {
340                    doc.get_tokens()[sp.start..sp.end]
341                        .iter()
342                        .map(|tok| doc.get_span_content_str(&tok.span))
343                        .collect::<String>()
344                })
345                .collect()
346        }
347    }
348
349    use super::Linter;
350    use crate::spell::FstDictionary;
351
352    #[track_caller]
353    pub fn assert_no_lints(text: &str, linter: impl Linter) {
354        assert_lint_count(text, linter, 0);
355    }
356
357    #[track_caller]
358    pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) {
359        let test = Document::new_markdown_default_curated(text);
360        let lints = linter.lint(&test);
361        dbg!(&lints);
362        if lints.len() != count {
363            panic!(
364                "Expected \"{text}\" to create {count} lints, but it created {}.",
365                lints.len()
366            );
367        }
368    }
369
370    /// Assert the total number of suggestions produced by a [`Linter`], spread across all produced
371    /// [`Lint`]s.
372    #[track_caller]
373    pub fn assert_suggestion_count(text: &str, mut linter: impl Linter, count: usize) {
374        let test = Document::new_markdown_default_curated(text);
375        let lints = linter.lint(&test);
376        assert_eq!(
377            lints.iter().map(|l| l.suggestions.len()).sum::<usize>(),
378            count
379        );
380    }
381
382    /// Runs a provided linter on text, applies the first suggestion from each lint
383    /// and asserts whether the result is equal to a given value.
384    #[track_caller]
385    pub fn assert_suggestion_result(text: &str, linter: impl Linter, expected_result: &str) {
386        assert_nth_suggestion_result(text, linter, expected_result, 0);
387    }
388
389    /// Runs a provided linter on text, applies the nth suggestion from each lint
390    /// and asserts whether the result is equal to a given value.
391    ///
392    /// Note that `n` starts at zero.
393    #[track_caller]
394    pub fn assert_nth_suggestion_result(
395        text: &str,
396        mut linter: impl Linter,
397        expected_result: &str,
398        n: usize,
399    ) {
400        let transformed_str = transform_nth_str(text, &mut linter, n);
401
402        if transformed_str.as_str() != expected_result {
403            panic!(
404                "Expected \"{transformed_str}\" to be \"{expected_result}\" after applying the computed suggestions."
405            );
406        }
407
408        // Applying the suggestions should fix all the lints.
409        assert_lint_count(&transformed_str, linter, 0);
410    }
411
412    #[track_caller]
413    pub fn assert_top3_suggestion_result(
414        text: &str,
415        mut linter: impl Linter,
416        expected_result: &str,
417    ) {
418        let zeroth = transform_nth_str(text, &mut linter, 0);
419        let first = transform_nth_str(text, &mut linter, 1);
420        let second = transform_nth_str(text, &mut linter, 2);
421
422        match (
423            zeroth.as_str() == expected_result,
424            first.as_str() == expected_result,
425            second.as_str() == expected_result,
426        ) {
427            (true, false, false) => assert_lint_count(&zeroth, linter, 0),
428            (false, true, false) => assert_lint_count(&first, linter, 0),
429            (false, false, true) => assert_lint_count(&second, linter, 0),
430            (false, false, false) => panic!(
431                "None of the top 3 suggestions produced the expected result:\n\
432                Expected: \"{expected_result}\"\n\
433                Got:\n\
434                [0]: \"{zeroth}\"\n\
435                [1]: \"{first}\"\n\
436                [2]: \"{second}\""
437            ),
438            // I think it's not possible for more than one suggestion to be correct
439            _ => {}
440        }
441    }
442
443    /// Asserts that none of the suggestions from the linter match the given text.
444    #[track_caller]
445    pub fn assert_not_in_suggestion_result(
446        text: &str,
447        mut linter: impl Linter,
448        bad_suggestion: &str,
449    ) {
450        let test = Document::new_markdown_default_curated(text);
451        let lints = linter.lint(&test);
452
453        for (i, lint) in lints.iter().enumerate() {
454            for (j, suggestion) in lint.suggestions.iter().enumerate() {
455                let mut text_chars: Vec<char> = text.chars().collect();
456                suggestion.apply(lint.span, &mut text_chars);
457                let suggestion_text: String = text_chars.into_iter().collect();
458
459                if suggestion_text == bad_suggestion {
460                    panic!(
461                        "Found undesired suggestion at lint[{i}].suggestions[{j}]:\n\
462                        Expected to not find suggestion: \"{bad_suggestion}\"\n\
463                        But found: \"{suggestion_text}\""
464                    );
465                }
466            }
467        }
468    }
469
470    /// Asserts both that the given text matches the expected good suggestions and that none of the
471    /// suggestions are in the bad suggestions list.
472    #[track_caller]
473    pub fn assert_good_and_bad_suggestions(
474        text: &str,
475        mut linter: impl Linter,
476        good: &[&str],
477        bad: &[&str],
478    ) {
479        let test = Document::new_markdown_default_curated(text);
480        let lints = linter.lint(&test);
481
482        let mut unseen_good: HashSet<_> = good.iter().cloned().collect();
483        let mut found_bad = Vec::new();
484        let mut found_good = Vec::new();
485
486        for (i, lint) in lints.into_iter().enumerate() {
487            for (j, suggestion) in lint.suggestions.into_iter().enumerate() {
488                let mut text_chars: Vec<char> = text.chars().collect();
489                suggestion.apply(lint.span, &mut text_chars);
490                let suggestion_text: String = text_chars.into_iter().collect();
491
492                // Check for bad suggestions
493                if bad.contains(&&*suggestion_text) {
494                    found_bad.push((i, j, suggestion_text.clone()));
495                    eprintln!(
496                        "  ❌ Found bad suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
497                    );
498                }
499                // Check for good suggestions
500                else if good.contains(&&*suggestion_text) {
501                    found_good.push((i, j, suggestion_text.clone()));
502                    eprintln!(
503                        "  ✅ Found good suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
504                    );
505                    unseen_good.remove(suggestion_text.as_str());
506                }
507            }
508        }
509
510        // Print summary
511        if !found_bad.is_empty() || !unseen_good.is_empty() {
512            eprintln!("\n=== Test Summary ===");
513
514            // In the summary section, change these loops:
515            if !found_bad.is_empty() {
516                eprintln!("\n❌ Found {} bad suggestions:", found_bad.len());
517                for (i, j, text) in &found_bad {
518                    eprintln!("  - lint[{i}].suggestions[{j}]: \"{text}\"");
519                }
520            }
521
522            // And for the good suggestions:
523            if !unseen_good.is_empty() {
524                eprintln!(
525                    "\n❌ Missing {} expected good suggestions:",
526                    unseen_good.len()
527                );
528                for text in &unseen_good {
529                    eprintln!("  - \"{text}\"");
530                }
531            }
532
533            eprintln!("\n✅ Found {} good suggestions", found_good.len());
534            eprintln!("==================\n");
535
536            if !found_bad.is_empty() || !unseen_good.is_empty() {
537                panic!("Test failed - see error output above");
538            }
539        } else {
540            eprintln!(
541                "\n✅ All {} good suggestions found, no bad suggestions\n",
542                found_good.len()
543            );
544        }
545    }
546
547    fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
548        let mut text_chars: Vec<char> = text.chars().collect();
549
550        let mut iter_count = 0;
551
552        loop {
553            let test = Document::new_from_vec(
554                text_chars.clone().into(),
555                &PlainEnglish,
556                &FstDictionary::curated(),
557            );
558            let lints = linter.lint(&test);
559
560            if let Some(lint) = lints.first() {
561                if let Some(sug) = lint.suggestions.get(n) {
562                    sug.apply(lint.span, &mut text_chars);
563
564                    let transformed_str: String = text_chars.iter().collect();
565                    dbg!(transformed_str);
566                } else {
567                    break;
568                }
569            } else {
570                break;
571            }
572
573            iter_count += 1;
574
575            if iter_count == 100 {
576                break;
577            }
578        }
579
580        eprintln!("Corrected {iter_count} times.");
581
582        text_chars.iter().collect()
583    }
584}