harper_core/linting/
mod.rs

1//! Frameworks and rules that locate errors in text.
2//!
3//! See the [`Linter`] trait and the [documentation for authoring a rule](https://writewithharper.com/docs/contributors/author-a-rule) for more information.
4
5mod a_part;
6mod a_while;
7mod addicting;
8mod adjective_double_degree;
9mod adjective_of_a;
10mod after_later;
11mod all_intents_and_purposes;
12mod allow_to;
13mod am_in_the_morning;
14mod amounts_for;
15mod an_a;
16mod and_in;
17mod and_the_like;
18mod another_thing_coming;
19mod another_think_coming;
20mod apart_from;
21mod ask_no_preposition;
22mod avoid_curses;
23mod back_in_the_day;
24mod be_allowed;
25mod best_of_all_time;
26mod boring_words;
27mod bought;
28mod brand_brandish;
29mod call_them;
30mod cant;
31mod capitalize_personal_pronouns;
32mod cautionary_tale;
33mod change_tack;
34mod chock_full;
35mod closed_compounds;
36mod comma_fixes;
37mod compound_nouns;
38mod compound_subject_i;
39mod confident;
40mod correct_number_suffix;
41mod criteria_phenomena;
42mod cure_for;
43mod currency_placement;
44mod dashes;
45mod despite_of;
46mod determiner_without_noun;
47mod didnt;
48mod discourse_markers;
49mod disjoint_prefixes;
50mod dot_initialisms;
51mod double_click;
52mod double_modal;
53mod ellipsis_length;
54mod else_possessive;
55mod ever_every;
56mod everyday;
57mod expand_memory_shorthands;
58mod expand_time_shorthands;
59mod expr_linter;
60mod far_be_it;
61mod fascinated_by;
62mod feel_fell;
63mod few_units_of_time_ago;
64mod filler_words;
65mod find_fine;
66mod first_aid_kit;
67mod for_noun;
68mod free_predicate;
69mod friend_of_me;
70mod go_so_far_as_to;
71mod handful;
72mod have_pronoun;
73mod have_take_a_look;
74mod hedging;
75mod hello_greeting;
76mod hereby;
77mod hop_hope;
78mod hope_youre;
79mod how_to;
80mod hyphenate_number_day;
81mod i_am_agreement;
82mod if_wouldve;
83mod in_on_the_cards;
84mod inflected_verb_after_to;
85mod initialism_linter;
86mod initialisms;
87mod interested_in;
88mod it_is;
89mod it_looks_like_that;
90mod it_would_be;
91mod its_contraction;
92mod its_possessive;
93mod jealous_of;
94mod johns_hopkins;
95mod left_right_hand;
96mod less_worse;
97mod let_to_do;
98mod lets_confusion;
99mod likewise;
100mod lint;
101mod lint_group;
102mod lint_kind;
103mod long_sentences;
104mod looking_forward_to;
105mod map_phrase_linter;
106mod map_phrase_set_linter;
107mod mass_nouns;
108mod merge_linters;
109mod merge_words;
110mod missing_preposition;
111mod missing_space;
112mod missing_to;
113mod misspell;
114mod mixed_bag;
115mod modal_be_adjective;
116mod modal_of;
117mod modal_seem;
118mod months;
119mod more_better;
120mod most_number;
121mod most_of_the_times;
122mod multiple_sequential_pronouns;
123mod nail_on_the_head;
124mod need_to_noun;
125mod no_french_spaces;
126mod no_match_for;
127mod no_oxford_comma;
128mod nobody;
129mod nominal_wants;
130mod noun_verb_confusion;
131mod number_suffix_capitalization;
132mod of_course;
133mod oldest_in_the_book;
134mod on_floor;
135mod once_or_twice;
136mod one_and_the_same;
137mod open_compounds;
138mod open_the_light;
139mod orthographic_consistency;
140mod ought_to_be;
141mod out_of_date;
142mod oxford_comma;
143mod oxymorons;
144mod phrasal_verb_as_compound_noun;
145mod phrase_corrections;
146mod phrase_set_corrections;
147mod pique_interest;
148mod possessive_noun;
149mod possessive_your;
150mod progressive_needs_be;
151mod pronoun_are;
152mod pronoun_contraction;
153mod pronoun_inflection_be;
154mod pronoun_knew;
155mod proper_noun_capitalization_linters;
156mod quantifier_needs_of;
157mod quantifier_numeral_conflict;
158mod quite_quiet;
159mod quote_spacing;
160mod redundant_acronyms;
161mod redundant_additive_adverbs;
162mod regionalisms;
163mod repeated_words;
164mod respond;
165mod right_click;
166mod roller_skated;
167mod safe_to_save;
168mod save_to_safe;
169mod semicolon_apostrophe;
170mod sentence_capitalization;
171mod shoot_oneself_in_the_foot;
172mod simple_past_to_past_participle;
173mod since_duration;
174mod single_be;
175mod some_without_article;
176mod something_is;
177mod somewhat_something;
178mod soon_to_be;
179mod sought_after;
180mod spaces;
181mod spell_check;
182mod spelled_numbers;
183mod split_words;
184mod subject_pronoun;
185mod suggestion;
186mod take_medicine;
187mod take_serious;
188mod that_than;
189mod that_which;
190mod the_how_why;
191mod the_my;
192mod then_than;
193mod theres;
194mod theses_these;
195mod thing_think;
196mod though_thought;
197mod throw_away;
198mod throw_rubbish;
199mod to_adverb;
200mod to_two_too;
201mod touristic;
202mod transposed_space;
203mod unclosed_quotes;
204mod update_place_names;
205mod use_genitive;
206mod use_title_case;
207mod verb_to_adjective;
208mod very_unique;
209mod vice_versa;
210mod was_aloud;
211mod way_too_adjective;
212mod well_educated;
213mod whereas;
214mod widely_accepted;
215mod win_prize;
216mod wish_could;
217mod wordpress_dotcom;
218mod would_never_have;
219
220pub use expr_linter::ExprLinter;
221pub use initialism_linter::InitialismLinter;
222pub use lint::Lint;
223pub use lint_group::{LintGroup, LintGroupConfig};
224pub use lint_kind::LintKind;
225pub use map_phrase_linter::MapPhraseLinter;
226pub use map_phrase_set_linter::MapPhraseSetLinter;
227pub use spell_check::SpellCheck;
228pub use suggestion::Suggestion;
229
230use crate::{Document, LSend, render_markdown};
231
232/// A __stateless__ rule that searches documents for grammatical errors.
233///
234/// Commonly implemented via [`ExprLinter`].
235///
236/// See also: [`LintGroup`].
237pub trait Linter: LSend {
238    /// Analyzes a document and produces zero or more [`Lint`]s.
239    /// We pass `self` mutably for caching purposes.
240    fn lint(&mut self, document: &Document) -> Vec<Lint>;
241    /// A user-facing description of what kinds of grammatical errors this rule looks for.
242    /// It is usually shown in settings menus.
243    fn description(&self) -> &str;
244}
245
246/// A blanket-implemented trait that renders the Markdown description field of a linter to HTML.
247pub trait HtmlDescriptionLinter {
248    fn description_html(&self) -> String;
249}
250
251impl<L: ?Sized> HtmlDescriptionLinter for L
252where
253    L: Linter,
254{
255    fn description_html(&self) -> String {
256        let desc = self.description();
257        render_markdown(desc)
258    }
259}
260
261#[cfg(test)]
262pub mod tests {
263    use crate::parsers::Markdown;
264    use crate::{Document, Span, Token};
265    use hashbrown::HashSet;
266
267    /// Extension trait for converting spans of tokens back to their original text
268    pub trait SpanVecExt {
269        fn to_strings(&self, doc: &Document) -> Vec<String>;
270    }
271
272    impl SpanVecExt for Vec<Span<Token>> {
273        fn to_strings(&self, doc: &Document) -> Vec<String> {
274            self.iter()
275                .map(|sp| {
276                    doc.get_tokens()[sp.start..sp.end]
277                        .iter()
278                        .map(|tok| doc.get_span_content_str(&tok.span))
279                        .collect::<String>()
280                })
281                .collect()
282        }
283    }
284
285    use super::Linter;
286    use crate::spell::FstDictionary;
287
288    #[track_caller]
289    pub fn assert_no_lints(text: &str, linter: impl Linter) {
290        assert_lint_count(text, linter, 0);
291    }
292
293    #[track_caller]
294    pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) {
295        let test = Document::new_markdown_default_curated(text);
296        let lints = linter.lint(&test);
297        dbg!(&lints);
298        if lints.len() != count {
299            panic!(
300                "Expected \"{text}\" to create {count} lints, but it created {}.",
301                lints.len()
302            );
303        }
304    }
305
306    /// Assert the total number of suggestions produced by a [`Linter`], spread across all produced
307    /// [`Lint`]s.
308    #[track_caller]
309    pub fn assert_suggestion_count(text: &str, mut linter: impl Linter, count: usize) {
310        let test = Document::new_markdown_default_curated(text);
311        let lints = linter.lint(&test);
312        assert_eq!(
313            lints.iter().map(|l| l.suggestions.len()).sum::<usize>(),
314            count
315        );
316    }
317
318    /// Runs a provided linter on text, applies the first suggestion from each lint
319    /// and asserts whether the result is equal to a given value.
320    #[track_caller]
321    pub fn assert_suggestion_result(text: &str, linter: impl Linter, expected_result: &str) {
322        assert_nth_suggestion_result(text, linter, expected_result, 0);
323    }
324
325    /// Runs a provided linter on text, applies the nth suggestion from each lint
326    /// and asserts whether the result is equal to a given value.
327    ///
328    /// Note that `n` starts at zero.
329    #[track_caller]
330    pub fn assert_nth_suggestion_result(
331        text: &str,
332        mut linter: impl Linter,
333        expected_result: &str,
334        n: usize,
335    ) {
336        let transformed_str = transform_nth_str(text, &mut linter, n);
337
338        if transformed_str.as_str() != expected_result {
339            panic!("Expected \"{expected_result}\"\n But got \"{transformed_str}\"");
340        }
341
342        // Applying the suggestions should fix all the lints.
343        assert_lint_count(&transformed_str, linter, 0);
344    }
345
346    #[track_caller]
347    pub fn assert_top3_suggestion_result(
348        text: &str,
349        mut linter: impl Linter,
350        expected_result: &str,
351    ) {
352        let zeroth = transform_nth_str(text, &mut linter, 0);
353        let first = transform_nth_str(text, &mut linter, 1);
354        let second = transform_nth_str(text, &mut linter, 2);
355
356        match (
357            zeroth.as_str() == expected_result,
358            first.as_str() == expected_result,
359            second.as_str() == expected_result,
360        ) {
361            (true, false, false) => assert_lint_count(&zeroth, linter, 0),
362            (false, true, false) => assert_lint_count(&first, linter, 0),
363            (false, false, true) => assert_lint_count(&second, linter, 0),
364            (false, false, false) => panic!(
365                "None of the top 3 suggestions produced the expected result:\n\
366                Expected: \"{expected_result}\"\n\
367                Got:\n\
368                [0]: \"{zeroth}\"\n\
369                [1]: \"{first}\"\n\
370                [2]: \"{second}\""
371            ),
372            // I think it's not possible for more than one suggestion to be correct
373            _ => {}
374        }
375    }
376
377    /// Asserts that none of the suggestions from the linter match the given text.
378    #[track_caller]
379    pub fn assert_not_in_suggestion_result(
380        text: &str,
381        mut linter: impl Linter,
382        bad_suggestion: &str,
383    ) {
384        let test = Document::new_markdown_default_curated(text);
385        let lints = linter.lint(&test);
386
387        for (i, lint) in lints.iter().enumerate() {
388            for (j, suggestion) in lint.suggestions.iter().enumerate() {
389                let mut text_chars: Vec<char> = text.chars().collect();
390                suggestion.apply(lint.span, &mut text_chars);
391                let suggestion_text: String = text_chars.into_iter().collect();
392
393                if suggestion_text == bad_suggestion {
394                    panic!(
395                        "Found undesired suggestion at lint[{i}].suggestions[{j}]:\n\
396                        Expected to not find suggestion: \"{bad_suggestion}\"\n\
397                        But found: \"{suggestion_text}\""
398                    );
399                }
400            }
401        }
402    }
403
404    /// Asserts both that the given text matches the expected good suggestions and that none of the
405    /// suggestions are in the bad suggestions list.
406    #[track_caller]
407    pub fn assert_good_and_bad_suggestions(
408        text: &str,
409        mut linter: impl Linter,
410        good: &[&str],
411        bad: &[&str],
412    ) {
413        let test = Document::new_markdown_default_curated(text);
414        let lints = linter.lint(&test);
415
416        let mut unseen_good: HashSet<_> = good.iter().cloned().collect();
417        let mut found_bad = Vec::new();
418        let mut found_good = Vec::new();
419
420        for (i, lint) in lints.into_iter().enumerate() {
421            for (j, suggestion) in lint.suggestions.into_iter().enumerate() {
422                let mut text_chars: Vec<char> = text.chars().collect();
423                suggestion.apply(lint.span, &mut text_chars);
424                let suggestion_text: String = text_chars.into_iter().collect();
425
426                // Check for bad suggestions
427                if bad.contains(&&*suggestion_text) {
428                    found_bad.push((i, j, suggestion_text.clone()));
429                    eprintln!(
430                        "  ❌ Found bad suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
431                    );
432                }
433                // Check for good suggestions
434                else if good.contains(&&*suggestion_text) {
435                    found_good.push((i, j, suggestion_text.clone()));
436                    eprintln!(
437                        "  ✅ Found good suggestion at lint[{i}].suggestions[{j}]: \"{suggestion_text}\""
438                    );
439                    unseen_good.remove(suggestion_text.as_str());
440                }
441            }
442        }
443
444        // Print summary
445        if !found_bad.is_empty() || !unseen_good.is_empty() {
446            eprintln!("\n=== Test Summary ===");
447
448            // In the summary section, change these loops:
449            if !found_bad.is_empty() {
450                eprintln!("\n❌ Found {} bad suggestions:", found_bad.len());
451                for (i, j, text) in &found_bad {
452                    eprintln!("  - lint[{i}].suggestions[{j}]: \"{text}\"");
453                }
454            }
455
456            // And for the good suggestions:
457            if !unseen_good.is_empty() {
458                eprintln!(
459                    "\n❌ Missing {} expected good suggestions:",
460                    unseen_good.len()
461                );
462                for text in &unseen_good {
463                    eprintln!("  - \"{text}\"");
464                }
465            }
466
467            eprintln!("\n✅ Found {} good suggestions", found_good.len());
468            eprintln!("==================\n");
469
470            if !found_bad.is_empty() || !unseen_good.is_empty() {
471                panic!("Test failed - see error output above");
472            }
473        } else {
474            eprintln!(
475                "\n✅ All {} good suggestions found, no bad suggestions\n",
476                found_good.len()
477            );
478        }
479    }
480
481    /// Asserts that the lint's message matches the expected message.
482    #[track_caller]
483    pub fn assert_lint_message(text: &str, mut linter: impl Linter, expected_message: &str) {
484        let test = Document::new_markdown_default_curated(text);
485        let lints = linter.lint(&test);
486
487        // Just check the first lint for now
488        if let Some(lint) = lints.first() {
489            if lint.message != expected_message {
490                panic!(
491                    "Expected lint message \"{expected_message}\", but got \"{}\"",
492                    lint.message
493                );
494            }
495        }
496    }
497
498    fn transform_nth_str(text: &str, linter: &mut impl Linter, n: usize) -> String {
499        let mut text_chars: Vec<char> = text.chars().collect();
500
501        let mut iter_count = 0;
502
503        loop {
504            let test = Document::new_from_vec(
505                text_chars.clone().into(),
506                &Markdown::default(),
507                &FstDictionary::curated(),
508            );
509            let lints = linter.lint(&test);
510
511            if let Some(lint) = lints.first() {
512                if let Some(sug) = lint.suggestions.get(n) {
513                    sug.apply(lint.span, &mut text_chars);
514
515                    let transformed_str: String = text_chars.iter().collect();
516                    dbg!(transformed_str);
517                } else {
518                    break;
519                }
520            } else {
521                break;
522            }
523
524            iter_count += 1;
525
526            if iter_count == 100 {
527                break;
528            }
529        }
530
531        eprintln!("Corrected {iter_count} times.");
532
533        text_chars.iter().collect()
534    }
535}
harper_core/linting/mod.rs

harper_core/linting/
mod.rs