harper_core/linting/
mod.rs

1//! Frameworks and rules that locate errors in text.
2//!
3//! See the [`Linter`] trait and the [documentation for authoring a rule](https://writewithharper.com/docs/contributors/author-a-rule) for more information.
4
5mod a_part;
6mod addicting;
7mod adjective_double_degree;
8mod adjective_of_a;
9mod after_later;
10mod all_intents_and_purposes;
11mod allow_to;
12mod am_in_the_morning;
13mod amounts_for;
14mod an_a;
15mod another_thing_coming;
16mod another_think_coming;
17mod ask_no_preposition;
18mod avoid_curses;
19mod back_in_the_day;
20mod be_allowed;
21mod best_of_all_time;
22mod boring_words;
23mod bought;
24mod call_them;
25mod cant;
26mod capitalize_personal_pronouns;
27mod cautionary_tale;
28mod change_tack;
29mod chock_full;
30mod closed_compounds;
31mod comma_fixes;
32mod compound_nouns;
33mod compound_subject_i;
34mod confident;
35mod correct_number_suffix;
36mod criteria_phenomena;
37mod currency_placement;
38mod dashes;
39mod despite_of;
40mod determiner_without_noun;
41mod didnt;
42mod discourse_markers;
43mod dot_initialisms;
44mod double_click;
45mod double_modal;
46mod ellipsis_length;
47mod else_possessive;
48mod everyday;
49mod expand_memory_shorthands;
50mod expand_time_shorthands;
51mod expr_linter;
52mod far_be_it;
53mod feel_fell;
54mod few_units_of_time_ago;
55mod filler_words;
56mod first_aid_kit;
57mod for_noun;
58mod free_predicate;
59mod friend_of_me;
60mod have_pronoun;
61mod have_take_a_look;
62mod hedging;
63mod hello_greeting;
64mod hereby;
65mod hop_hope;
66mod hope_youre;
67mod how_to;
68mod hyphenate_number_day;
69mod i_am_agreement;
70mod in_on_the_cards;
71mod inflected_verb_after_to;
72mod initialism_linter;
73mod initialisms;
74mod interested_in;
75mod it_is;
76mod it_looks_like_that;
77mod it_would_be;
78mod its_contraction;
79mod its_possessive;
80mod left_right_hand;
81mod less_worse;
82mod let_to_do;
83mod lets_confusion;
84mod likewise;
85mod lint;
86mod lint_group;
87mod lint_kind;
88mod long_sentences;
89mod looking_forward_to;
90mod map_phrase_linter;
91mod map_phrase_set_linter;
92mod mass_plurals;
93mod merge_linters;
94mod merge_words;
95mod missing_preposition;
96mod missing_to;
97mod misspell;
98mod mixed_bag;
99mod modal_of;
100mod modal_seem;
101mod months;
102mod more_better;
103mod most_number;
104mod most_of_the_times;
105mod multiple_sequential_pronouns;
106mod nail_on_the_head;
107mod no_french_spaces;
108mod no_match_for;
109mod no_oxford_comma;
110mod nobody;
111mod nominal_wants;
112mod noun_countability;
113mod noun_verb_confusion;
114mod number_suffix_capitalization;
115mod of_course;
116mod on_floor;
117mod once_or_twice;
118mod one_and_the_same;
119mod open_compounds;
120mod open_the_light;
121mod orthographic_consistency;
122mod ought_to_be;
123mod out_of_date;
124mod oxford_comma;
125mod oxymorons;
126mod phrasal_verb_as_compound_noun;
127mod phrase_corrections;
128mod phrase_set_corrections;
129mod pique_interest;
130mod possessive_noun;
131mod possessive_your;
132mod progressive_needs_be;
133mod pronoun_are;
134mod pronoun_contraction;
135mod pronoun_inflection_be;
136mod pronoun_knew;
137mod proper_noun_capitalization_linters;
138mod quantifier_needs_of;
139mod quite_quiet;
140mod quote_spacing;
141mod redundant_additive_adverbs;
142mod regionalisms;
143mod repeated_words;
144mod roller_skated;
145mod safe_to_save;
146mod save_to_safe;
147mod semicolon_apostrophe;
148mod sentence_capitalization;
149mod shoot_oneself_in_the_foot;
150mod simple_past_to_past_participle;
151mod since_duration;
152mod some_without_article;
153mod something_is;
154mod somewhat_something;
155mod sought_after;
156mod spaces;
157mod spell_check;
158mod spelled_numbers;
159mod split_words;
160mod suggestion;
161mod take_serious;
162mod that_than;
163mod that_which;
164mod the_how_why;
165mod the_my;
166mod then_than;
167mod theres;
168mod thing_think;
169mod though_thought;
170mod throw_away;
171mod throw_rubbish;
172mod to_adverb;
173mod to_two_too;
174mod touristic;
175mod unclosed_quotes;
176mod update_place_names;
177mod use_genitive;
178mod verb_to_adjective;
179mod very_unique;
180mod vice_versa;
181mod was_aloud;
182mod way_too_adjective;
183mod well_educated;
184mod whereas;
185mod widely_accepted;
186mod win_prize;
187mod wordpress_dotcom;
188mod would_never_have;
189
190pub use a_part::APart;
191pub use addicting::Addicting;
192pub use adjective_double_degree::AdjectiveDoubleDegree;
193pub use adjective_of_a::AdjectiveOfA;
194pub use after_later::AfterLater;
195pub use all_intents_and_purposes::AllIntentsAndPurposes;
196pub use allow_to::AllowTo;
197pub use am_in_the_morning::AmInTheMorning;
198pub use amounts_for::AmountsFor;
199pub use an_a::AnA;
200pub use another_thing_coming::AnotherThingComing;
201pub use another_think_coming::AnotherThinkComing;
202pub use ask_no_preposition::AskNoPreposition;
203pub use avoid_curses::AvoidCurses;
204pub use back_in_the_day::BackInTheDay;
205pub use be_allowed::BeAllowed;
206pub use best_of_all_time::BestOfAllTime;
207pub use boring_words::BoringWords;
208pub use bought::Bought;
209pub use cant::Cant;
210pub use capitalize_personal_pronouns::CapitalizePersonalPronouns;
211pub use cautionary_tale::CautionaryTale;
212pub use change_tack::ChangeTack;
213pub use chock_full::ChockFull;
214pub use comma_fixes::CommaFixes;
215pub use compound_nouns::CompoundNouns;
216pub use compound_subject_i::CompoundSubjectI;
217pub use confident::Confident;
218pub use correct_number_suffix::CorrectNumberSuffix;
219pub use criteria_phenomena::CriteriaPhenomena;
220pub use currency_placement::CurrencyPlacement;
221pub use dashes::Dashes;
222pub use despite_of::DespiteOf;
223pub use didnt::Didnt;
224pub use discourse_markers::DiscourseMarkers;
225pub use dot_initialisms::DotInitialisms;
226pub use double_click::DoubleClick;
227pub use double_modal::DoubleModal;
228pub use ellipsis_length::EllipsisLength;
229pub use everyday::Everyday;
230pub use expand_memory_shorthands::ExpandMemoryShorthands;
231pub use expand_time_shorthands::ExpandTimeShorthands;
232pub use expr_linter::ExprLinter;
233pub use far_be_it::FarBeIt;
234pub use feel_fell::FeelFell;
235pub use few_units_of_time_ago::FewUnitsOfTimeAgo;
236pub use filler_words::FillerWords;
237pub use for_noun::ForNoun;
238pub use free_predicate::FreePredicate;
239pub use friend_of_me::FriendOfMe;
240pub use have_pronoun::HavePronoun;
241pub use have_take_a_look::HaveTakeALook;
242pub use hedging::Hedging;
243pub use hello_greeting::HelloGreeting;
244pub use hereby::Hereby;
245pub use hop_hope::HopHope;
246pub use how_to::HowTo;
247pub use hyphenate_number_day::HyphenateNumberDay;
248pub use i_am_agreement::IAmAgreement;
249pub use in_on_the_cards::InOnTheCards;
250pub use inflected_verb_after_to::InflectedVerbAfterTo;
251pub use initialism_linter::InitialismLinter;
252pub use interested_in::InterestedIn;
253pub use it_looks_like_that::ItLooksLikeThat;
254pub use its_contraction::ItsContraction;
255pub use its_possessive::ItsPossessive;
256pub use left_right_hand::LeftRightHand;
257pub use less_worse::LessWorse;
258pub use let_to_do::LetToDo;
259pub use lets_confusion::LetsConfusion;
260pub use likewise::Likewise;
261pub use lint::Lint;
262pub use lint_group::{LintGroup, LintGroupConfig};
263pub use lint_kind::LintKind;
264pub use long_sentences::LongSentences;
265pub use looking_forward_to::LookingForwardTo;
266pub use map_phrase_linter::MapPhraseLinter;
267pub use map_phrase_set_linter::MapPhraseSetLinter;
268pub use mass_plurals::MassPlurals;
269pub use merge_words::MergeWords;
270pub use missing_preposition::MissingPreposition;
271pub use missing_to::MissingTo;
272pub use misspell::Misspell;
273pub use mixed_bag::MixedBag;
274pub use modal_of::ModalOf;
275pub use modal_seem::ModalSeem;
276pub use months::Months;
277pub use more_better::MoreBetter;
278pub use most_number::MostNumber;
279pub use most_of_the_times::MostOfTheTimes;
280pub use multiple_sequential_pronouns::MultipleSequentialPronouns;
281pub use nail_on_the_head::NailOnTheHead;
282pub use no_french_spaces::NoFrenchSpaces;
283pub use no_match_for::NoMatchFor;
284pub use no_oxford_comma::NoOxfordComma;
285pub use nobody::Nobody;
286pub use noun_countability::NounCountability;
287pub use noun_verb_confusion::NounVerbConfusion;
288pub use number_suffix_capitalization::NumberSuffixCapitalization;
289pub use of_course::OfCourse;
290pub use on_floor::OnFloor;
291pub use once_or_twice::OnceOrTwice;
292pub use one_and_the_same::OneAndTheSame;
293pub use open_the_light::OpenTheLight;
294pub use orthographic_consistency::OrthographicConsistency;
295pub use ought_to_be::OughtToBe;
296pub use out_of_date::OutOfDate;
297pub use oxford_comma::OxfordComma;
298pub use oxymorons::Oxymorons;
299pub use phrasal_verb_as_compound_noun::PhrasalVerbAsCompoundNoun;
300pub use pique_interest::PiqueInterest;
301pub use possessive_noun::PossessiveNoun;
302pub use possessive_your::PossessiveYour;
303pub use progressive_needs_be::ProgressiveNeedsBe;
304pub use pronoun_are::PronounAre;
305pub use pronoun_contraction::PronounContraction;
306pub use pronoun_inflection_be::PronounInflectionBe;
307pub use quantifier_needs_of::QuantifierNeedsOf;
308pub use quite_quiet::QuiteQuiet;
309pub use quote_spacing::QuoteSpacing;
310pub use redundant_additive_adverbs::RedundantAdditiveAdverbs;
311pub use regionalisms::Regionalisms;
312pub use repeated_words::RepeatedWords;
313pub use roller_skated::RollerSkated;
314pub use safe_to_save::SafeToSave;
315pub use save_to_safe::SaveToSafe;
316pub use semicolon_apostrophe::SemicolonApostrophe;
317pub use sentence_capitalization::SentenceCapitalization;
318pub use shoot_oneself_in_the_foot::ShootOneselfInTheFoot;
319pub use simple_past_to_past_participle::SimplePastToPastParticiple;
320pub use since_duration::SinceDuration;
321pub use some_without_article::SomeWithoutArticle;
322pub use something_is::SomethingIs;
323pub use somewhat_something::SomewhatSomething;
324pub use sought_after::SoughtAfter;
325pub use spaces::Spaces;
326pub use spell_check::SpellCheck;
327pub use spelled_numbers::SpelledNumbers;
328pub use split_words::SplitWords;
329pub use suggestion::Suggestion;
330pub use take_serious::TakeSerious;
331pub use that_than::ThatThan;
332pub use that_which::ThatWhich;
333pub use the_how_why::TheHowWhy;
334pub use the_my::TheMy;
335pub use then_than::ThenThan;
336pub use theres::Theres;
337pub use thing_think::ThingThink;
338pub use though_thought::ThoughThought;
339pub use throw_away::ThrowAway;
340pub use throw_rubbish::ThrowRubbish;
341pub use to_adverb::ToAdverb;
342pub use to_two_too::ToTwoToo;
343pub use touristic::Touristic;
344pub use unclosed_quotes::UnclosedQuotes;
345pub use update_place_names::UpdatePlaceNames;
346pub use use_genitive::UseGenitive;
347pub use verb_to_adjective::VerbToAdjective;
348pub use very_unique::VeryUnique;
349pub use vice_versa::ViceVersa;
350pub use was_aloud::WasAloud;
351pub use way_too_adjective::WayTooAdjective;
352pub use well_educated::WellEducated;
353pub use whereas::Whereas;
354pub use widely_accepted::WidelyAccepted;
355pub use win_prize::WinPrize;
356pub use wordpress_dotcom::WordPressDotcom;
357pub use would_never_have::WouldNeverHave;
358
359use crate::{Document, LSend, render_markdown};
360
361/// A __stateless__ rule that searches documents for grammatical errors.
362///
363/// Commonly implemented via [`ExprLinter`].
364///
365/// See also: [`LintGroup`].
366pub trait Linter: LSend {
367    /// Analyzes a document and produces zero or more [`Lint`]s.
368    /// We pass `self` mutably for caching purposes.
369    fn lint(&mut self, document: &Document) -> Vec<Lint>;
370    /// A user-facing description of what kinds of grammatical errors this rule looks for.
371    /// It is usually shown in settings menus.
372    fn description(&self) -> &str;
373}
374
375/// A blanket-implemented trait that renders the Markdown description field of a linter to HTML.
376pub trait HtmlDescriptionLinter {
377    fn description_html(&self) -> String;
378}
379
380impl<L: ?Sized> HtmlDescriptionLinter for L
381where
382    L: Linter,
383{
384    fn description_html(&self) -> String {
385        let desc = self.description();
386        render_markdown(desc)
387    }
388}
389
390#[cfg(test)]
391pub mod tests {
392    use crate::{Document, Span, Token, parsers::PlainEnglish};
393    use hashbrown::HashSet;
394
395    /// Extension trait for converting spans of tokens back to their original text
396    pub trait SpanVecExt {
397        fn to_strings(&self, doc: &Document) -> Vec<String>;
398    }
399
400    impl SpanVecExt for Vec<Span<Token>> {
401        fn to_strings(&self, doc: &Document) -> Vec<String> {
402            self.iter()
403                .map(|sp| {
404                    doc.get_tokens()[sp.start..sp.end]
405                        .iter()
406                        .map(|tok| doc.get_span_content_str(&tok.span))
407                        .collect::<String>()
408                })
409                .collect()
410        }
411    }
412
413    use super::Linter;
414    use crate::spell::FstDictionary;
415
416    #[track_caller]
417    pub fn assert_no_lints(text: &str, linter: impl Linter) {
418        assert_lint_count(text, linter, 0);
419    }
420
421    #[track_caller]
422    pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) {
423        let test = Document::new_markdown_default_curated(text);
424        let lints = linter.lint(&test);
425        dbg!(&lints);
426        if lints.len() != count {
427            panic!(
428                "Expected \"{text}\" to create {count} lints, but it created {}.",
429                lints.len()
430            );
431        }
432    }
433
434    /// Assert the total number of suggestions produced by a [`Linter`], spread across all produced
435    /// [`Lint`]s.
436    #[track_caller]
437    pub fn assert_suggestion_count(text: &str, mut linter: impl Linter, count: usize) {
438        let test = Document::new_markdown_default_curated(text);
439        let lints = linter.lint(&test);
440        assert_eq!(
441            lints.iter().map(|l| l.suggestions.len()).sum::<usize>(),
442            count
443        );
444    }
445
446    /// Runs a provided linter on text, applies the first suggestion from each lint
447    /// and asserts whether the result is equal to a given value.
448    #[track_caller]
449    pub fn assert_suggestion_result(text: &str, linter: impl Linter, expected_result: &str) {
450        assert_nth_suggestion_result(text, linter, expected_result, 0);
451    }
452
453    /// Runs a provided linter on text, applies the nth suggestion from each lint
454    /// and asserts whether the result is equal to a given value.
455    ///
456    /// Note that `n` starts at zero.
457    #[track_caller]
458    pub fn assert_nth_suggestion_result(
459        text: &str,
460        mut linter: impl Linter,
461        expected_result: &str,
462        n: usize,
463    ) {
464        let transformed_str = transform_nth_str(text, &mut linter, n);
465
466        if transformed_str.as_str() != expected_result {
467            panic!(
468                "Expected \"{transformed_str}\" to be \"{expected_result}\" after applying the computed suggestions."
469            );
470        }
471
472        // Applying the suggestions should fix all the lints.
473        assert_lint_count(&transformed_str, linter, 0);
474    }
475
476    #[track_caller]
477    pub fn assert_top3_suggestion_result(
478        text: &str,
479        mut linter: impl Linter,
480        expected_result: &str,
481    ) {
482        let zeroth = transform_nth_str(text, &mut linter, 0);
483        let first = transform_nth_str(text, &mut linter, 1);
484        let second = transform_nth_str(text, &mut linter, 2);
485
486        match (
487            zeroth.as_str() == expected_result,
488            first.as_str() == expected_result,
489            second.as_str() == expected_result,
490        ) {
491            (true, false, false) => assert_lint_count(&zeroth, linter, 0),
492            (false, true, false) => assert_lint_count(&first, linter, 0),
493            (false, false, true) => assert_lint_count(&second, linter, 0),
494            (false, false, false) => panic!(
495                "None of the top 3 suggestions produced the expected result:\n\
496                Expected: \"{expected_result}\"\n\
497                Got:\n\
498                [0]: \"{zeroth}\"\n\
499                [1]: \"{first}\"\n\
500                [2]: \"{second}\""
501            ),
502            // I think it's not possible for more than one suggestion to be correct
503            _ => {}
504        }
505    }
506
507    /// Asserts that none of the suggestions from the linter match the given text.
508    #[track_caller]
509    pub fn assert_not_in_suggestion_result(
510        text: &str,
511        mut linter: impl Linter,
512        bad_suggestion: &str,
513    ) {
514        let test = Document::new_markdown_default_curated(text);
515        let lints = linter.lint(&test);
516
517        for (i, lint) in lints.iter().enumerate() {
518            for (j, suggestion) in lint.suggestions.iter().enumerate() {
519                let mut text_chars: Vec<char> = text.chars().collect();
520                suggestion.apply(lint.span, &mut text_chars);
521                let suggestion_text: String = text_chars.into_iter().collect();
522
523                if suggestion_text == bad_suggestion {
524                    panic!(
525                        "Found undesired suggestion at lint[{i}].suggestions[{j}]:\n\
526                        Expected to not find suggestion: \"{bad_suggestion}\"\n\
527                        But found: \"{suggestion_text}\""
528                    );
529                }
530            }
531        }
532    }
533
534    /// Asserts both that the given text matches the expected good suggestions and that none of the
535    /// suggestions are in the bad suggestions list.
536    #[track_caller]
537    pub fn assert_good_and_bad_suggestions(
538        text: &str,
539        mut linter: impl Linter,
540        good: &[&str],
541        bad: &[&str],
542    ) {
543        let test = Document::new_markdown_default_curated(text);
544        let lints = linter.lint(&test);
545
546        let mut unseen_good: HashSet<_> = good.iter().cloned().collect();
547        let mut found_bad = Vec::new();
548        let mut found_good = Vec::new();
549
550        for (i, lint) in lints.into_iter().enumerate() {
551            for (j, suggestion) in lint.suggestions.into_iter().enumerate() {
552                let mut text_chars: Vec<char> = text.chars().collect();
553                suggestion.apply(lint.span, &mut text_chars);
554                let suggestion_text: String = text_chars.into_iter().collect();
555
556                // Check for bad suggestions
557                if bad.contains(&&*suggestion_text) {
558                    found_bad.push((i, j, suggestion_text.clone()));
559                    eprintln!(
560                        "  ❌ Found bad suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
561                    );
562                }
563                // Check for good suggestions
564                else if good.contains(&&*suggestion_text) {
565                    found_good.push((i, j, suggestion_text.clone()));
566                    eprintln!(
567                        "  ✅ Found good suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
568                    );
569                    unseen_good.remove(suggestion_text.as_str());
570                }
571            }
572        }
573
574        // Print summary
575        if !found_bad.is_empty() || !unseen_good.is_empty() {
576            eprintln!("\n=== Test Summary ===");
577
578            // In the summary section, change these loops:
579            if !found_bad.is_empty() {
580                eprintln!("\n❌ Found {} bad suggestions:", found_bad.len());
581                for (i, j, text) in &found_bad {
582                    eprintln!("  - lint[{i}].suggestions[{j}]: \"{text}\"");
583                }
584            }
585
586            // And for the good suggestions:
587            if !unseen_good.is_empty() {
588                eprintln!(
589                    "\n❌ Missing {} expected good suggestions:",
590                    unseen_good.len()
591                );
592                for text in &unseen_good {
593                    eprintln!("  - \"{text}\"");
594                }
595            }
596
597            eprintln!("\n✅ Found {} good suggestions", found_good.len());
598            eprintln!("==================\n");
599
600            if !found_bad.is_empty() || !unseen_good.is_empty() {
601                panic!("Test failed - see error output above");
602            }
603        } else {
604            eprintln!(
605                "\n✅ All {} good suggestions found, no bad suggestions\n",
606                found_good.len()
607            );
608        }
609    }
610
611    fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
612        let mut text_chars: Vec<char> = text.chars().collect();
613
614        let mut iter_count = 0;
615
616        loop {
617            let test = Document::new_from_vec(
618                text_chars.clone().into(),
619                &PlainEnglish,
620                &FstDictionary::curated(),
621            );
622            let lints = linter.lint(&test);
623
624            if let Some(lint) = lints.first() {
625                if let Some(sug) = lint.suggestions.get(n) {
626                    sug.apply(lint.span, &mut text_chars);
627
628                    let transformed_str: String = text_chars.iter().collect();
629                    dbg!(transformed_str);
630                } else {
631                    break;
632                }
633            } else {
634                break;
635            }
636
637            iter_count += 1;
638
639            if iter_count == 100 {
640                break;
641            }
642        }
643
644        eprintln!("Corrected {iter_count} times.");
645
646        text_chars.iter().collect()
647    }
648}