Skip to main content

harper_core/
lib.rs

1#![doc = include_str!("../README.md")]
2#![allow(dead_code)]
3
4mod case;
5mod char_ext;
6mod char_string;
7mod currency;
8mod dict_word_metadata;
9mod dict_word_metadata_orthography;
10mod document;
11mod edit_distance;
12pub mod expr;
13mod fat_token;
14mod ignored_lints;
15mod indefinite_article;
16mod irregular_nouns;
17mod irregular_verbs;
18pub mod language_detection;
19mod lexing;
20pub mod linting;
21mod mask;
22mod number;
23mod offsets;
24pub mod parsers;
25pub mod patterns;
26mod punctuation;
27mod regular_nouns;
28mod render_markdown;
29mod span;
30pub mod spell;
31mod sync;
32mod thesaurus_helper;
33mod title_case;
34mod token;
35mod token_kind;
36mod token_string_ext;
37mod vec_ext;
38pub mod weir;
39pub mod weirpack;
40
41use render_markdown::render_markdown;
42use std::collections::{BTreeMap, VecDeque};
43
44pub use case::{Case, CaseIterExt};
45pub use char_string::{CharString, CharStringExt};
46pub use currency::Currency;
47pub use dict_word_metadata::{
48    AdverbData, ConjunctionData, Degree, DeterminerData, Dialect, DialectFlags, DictWordMetadata,
49    NounData, PronounData, VerbData, VerbForm, VerbFormFlags,
50};
51pub use dict_word_metadata_orthography::{OrthFlags, Orthography};
52pub use document::Document;
53pub use fat_token::{FatStringToken, FatToken};
54pub use ignored_lints::{IgnoredLints, LintContext};
55pub use indefinite_article::{InitialSound, starts_with_vowel};
56pub use irregular_nouns::IrregularNouns;
57pub use irregular_verbs::IrregularVerbs;
58use linting::Lint;
59pub use mask::{Mask, Masker, RegexMasker};
60pub use number::{Number, OrdinalSuffix};
61pub use punctuation::{Punctuation, Quote};
62pub use regular_nouns::{get_plurals, get_singulars};
63pub use span::Span;
64pub use sync::{LSend, Lrc};
65pub use title_case::{make_title_case, make_title_case_str};
66pub use token::Token;
67pub use token_kind::TokenKind;
68pub use token_string_ext::TokenStringExt;
69pub use vec_ext::VecExt;
70
71/// Return `harper-core` version
72pub fn core_version() -> &'static str {
73    env!("CARGO_PKG_VERSION")
74}
75
76/// A utility function that removes overlapping lints in a vector,
77/// keeping the more important ones.
78///
79/// Note: this function will change the ordering of the lints.
80pub fn remove_overlaps(lints: &mut Vec<Lint>) {
81    if lints.len() < 2 {
82        return;
83    }
84
85    let mut remove_indices = VecDeque::new();
86    lints.sort_by_key(|l| l.priority);
87    lints.sort_by_key(|l| (l.span.start, !0 - l.span.end));
88
89    let mut cur = 0;
90
91    for (i, lint) in lints.iter().enumerate() {
92        if lint.span.start < cur {
93            remove_indices.push_back(i);
94            continue;
95        }
96        cur = lint.span.end;
97    }
98
99    lints.remove_indices(remove_indices);
100}
101
102/// Remove lints whose character spans overlap any nonempty match of an expression.
103///
104/// This is useful for letting higher-level token patterns mark text ranges where otherwise valid
105/// lower-level lints should be suppressed. Expression matches are checked from every token index,
106/// including overlapping matches, and zero-width expression matches are ignored.
107pub fn remove_lints_overlapping_expr<E: expr::Expr + ?Sized>(
108    expr: &E,
109    document: &Document,
110    lints: &mut Vec<Lint>,
111) {
112    if lints.is_empty() {
113        return;
114    }
115
116    let tokens = document.get_tokens();
117    let source = document.get_source();
118    let matched_spans: Vec<Span<char>> = (0..tokens.len())
119        .filter_map(|cursor| {
120            let token_span = expr.run(cursor, tokens, source)?;
121
122            if token_span.is_empty() {
123                None
124            } else {
125                Some(token_span.to_char_span(tokens))
126            }
127        })
128        .collect();
129
130    if matched_spans.is_empty() {
131        return;
132    }
133
134    lints.retain(|lint| {
135        !matched_spans
136            .iter()
137            .any(|matched_span| lint.span.overlaps_with(*matched_span))
138    });
139}
140
141/// Remove overlapping lints from a map keyed by rule name, similar to [`remove_overlaps`].
142///
143/// The map is treated as if all contained lints were in a single flat collection, ensuring the
144/// same lint would be kept regardless of whether it originated from `lint` or `organized_lints`.
145pub fn remove_overlaps_map<K: Ord>(lint_map: &mut BTreeMap<K, Vec<Lint>>) {
146    let total: usize = lint_map.values().map(Vec::len).sum();
147    if total < 2 {
148        return;
149    }
150
151    struct IndexedSpan {
152        rule_idx: usize,
153        lint_idx: usize,
154        priority: u8,
155        start: usize,
156        end: usize,
157    }
158
159    let mut removal_flags: Vec<Vec<bool>> = lint_map
160        .values()
161        .map(|lints| vec![false; lints.len()])
162        .collect();
163
164    let mut spans = Vec::with_capacity(total);
165    for (rule_idx, (_, lints)) in lint_map.iter().enumerate() {
166        for (lint_idx, lint) in lints.iter().enumerate() {
167            spans.push(IndexedSpan {
168                priority: lint.priority,
169                rule_idx,
170                lint_idx,
171                start: lint.span.start,
172                end: lint.span.end,
173            });
174        }
175    }
176
177    spans.sort_by_key(|span| span.priority);
178    spans.sort_by_key(|span| (span.start, usize::MAX - span.end));
179
180    let mut cur = 0;
181    for span in spans {
182        if span.start < cur {
183            removal_flags[span.rule_idx][span.lint_idx] = true;
184        } else {
185            cur = span.end;
186        }
187    }
188
189    for (rule_idx, (_, lints)) in lint_map.iter_mut().enumerate() {
190        if removal_flags[rule_idx].iter().all(|flag| !*flag) {
191            continue;
192        }
193
194        let mut idx = 0;
195        lints.retain(|_| {
196            let remove = removal_flags[rule_idx][idx];
197            idx += 1;
198            !remove
199        });
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use std::hash::DefaultHasher;
206    use std::hash::{Hash, Hasher};
207
208    use itertools::Itertools;
209    use quickcheck_macros::quickcheck;
210
211    use crate::linting::Lint;
212    use crate::remove_overlaps_map;
213    use crate::spell::FstDictionary;
214    use crate::{
215        Dialect, Document, Span,
216        expr::{AnchorStart, SequenceExpr},
217        linting::{LintGroup, Linter},
218        remove_lints_overlapping_expr, remove_overlaps,
219    };
220
221    #[test]
222    fn keeps_space_lint() {
223        let doc = Document::new_plain_english_curated("Ths  tet");
224
225        let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
226
227        let mut lints = linter.lint(&doc);
228
229        dbg!(&lints);
230        remove_overlaps(&mut lints);
231        dbg!(&lints);
232
233        assert_eq!(lints.len(), 3);
234    }
235
236    #[test]
237    fn remove_lints_overlapping_expr_removes_overlapping_lints() {
238        let doc = Document::new_plain_english_curated("keep bad keep");
239        let mut lints = vec![Lint {
240            span: Span::new(5, 8),
241            ..Default::default()
242        }];
243
244        remove_lints_overlapping_expr(&SequenceExpr::aco("bad"), &doc, &mut lints);
245
246        assert!(lints.is_empty());
247    }
248
249    #[test]
250    fn remove_lints_overlapping_expr_keeps_non_overlapping_lints() {
251        let doc = Document::new_plain_english_curated("keep bad keep");
252        let mut lints = vec![Lint {
253            span: Span::new(0, 4),
254            ..Default::default()
255        }];
256
257        remove_lints_overlapping_expr(&SequenceExpr::aco("bad"), &doc, &mut lints);
258
259        assert_eq!(lints.len(), 1);
260    }
261
262    #[test]
263    fn remove_lints_overlapping_expr_ignores_zero_width_matches() {
264        let doc = Document::new_plain_english_curated("bad");
265        let mut lints = vec![Lint {
266            span: Span::new(0, 3),
267            ..Default::default()
268        }];
269
270        remove_lints_overlapping_expr(&AnchorStart, &doc, &mut lints);
271
272        assert_eq!(lints.len(), 1);
273    }
274
275    #[quickcheck]
276    fn overlap_removals_have_equivalent_behavior(s: String) {
277        let doc = Document::new_plain_english_curated(&s);
278        let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
279
280        let mut lint_map = linter.organized_lints(&doc);
281        let mut lint_flat: Vec<_> = lint_map.values().flatten().cloned().collect();
282
283        remove_overlaps_map(&mut lint_map);
284        remove_overlaps(&mut lint_flat);
285
286        let post_removal_flat: Vec<_> = lint_map.values().flatten().cloned().collect();
287
288        fn hash_lint(lint: &Lint) -> u64 {
289            let mut hasher = DefaultHasher::new();
290            lint.hash(&mut hasher);
291            hasher.finish()
292        }
293
294        // We want to ignore ordering, so let us hash these first and sort them.
295        let lint_flat_hashes: Vec<_> = lint_flat.iter().map(hash_lint).sorted().collect();
296        let post_removal_flat_hashes: Vec<_> =
297            post_removal_flat.iter().map(hash_lint).sorted().collect();
298
299        assert_eq!(post_removal_flat_hashes, lint_flat_hashes);
300    }
301}