Skip to main content

harper_core/
lib.rs

1#![doc = include_str!("../README.md")]
2#![allow(dead_code)]
3
4mod case;
5mod char_ext;
6mod char_string;
7mod currency;
8mod dict_word_metadata;
9mod dict_word_metadata_orthography;
10mod document;
11mod edit_distance;
12pub mod expr;
13mod fat_token;
14mod ignored_lints;
15mod indefinite_article;
16mod irregular_nouns;
17mod irregular_verbs;
18pub mod language_detection;
19mod lexing;
20pub mod linting;
21mod mask;
22mod number;
23mod offsets;
24pub mod parsers;
25pub mod patterns;
26mod punctuation;
27mod render_markdown;
28mod span;
29pub mod spell;
30mod sync;
31mod thesaurus_helper;
32mod title_case;
33mod token;
34mod token_kind;
35mod token_string_ext;
36mod vec_ext;
37pub mod weir;
38pub mod weirpack;
39
40use render_markdown::render_markdown;
41use std::collections::{BTreeMap, VecDeque};
42
43pub use case::{Case, CaseIterExt};
44pub use char_string::{CharString, CharStringExt};
45pub use currency::Currency;
46pub use dict_word_metadata::{
47    AdverbData, ConjunctionData, Degree, DeterminerData, Dialect, DialectFlags, DictWordMetadata,
48    NounData, PronounData, VerbData, VerbForm, VerbFormFlags,
49};
50pub use dict_word_metadata_orthography::{OrthFlags, Orthography};
51pub use document::Document;
52pub use fat_token::{FatStringToken, FatToken};
53pub use ignored_lints::{IgnoredLints, LintContext};
54pub use indefinite_article::{InitialSound, starts_with_vowel};
55pub use irregular_nouns::IrregularNouns;
56pub use irregular_verbs::IrregularVerbs;
57use linting::Lint;
58pub use mask::{Mask, Masker, RegexMasker};
59pub use number::{Number, OrdinalSuffix};
60pub use punctuation::{Punctuation, Quote};
61pub use span::Span;
62pub use sync::{LSend, Lrc};
63pub use title_case::{make_title_case, make_title_case_str};
64pub use token::Token;
65pub use token_kind::TokenKind;
66pub use token_string_ext::TokenStringExt;
67pub use vec_ext::VecExt;
68
69/// Return `harper-core` version
70pub fn core_version() -> &'static str {
71    env!("CARGO_PKG_VERSION")
72}
73
74/// A utility function that removes overlapping lints in a vector,
75/// keeping the more important ones.
76///
77/// Note: this function will change the ordering of the lints.
78pub fn remove_overlaps(lints: &mut Vec<Lint>) {
79    if lints.len() < 2 {
80        return;
81    }
82
83    let mut remove_indices = VecDeque::new();
84    lints.sort_by_key(|l| l.priority);
85    lints.sort_by_key(|l| (l.span.start, !0 - l.span.end));
86
87    let mut cur = 0;
88
89    for (i, lint) in lints.iter().enumerate() {
90        if lint.span.start < cur {
91            remove_indices.push_back(i);
92            continue;
93        }
94        cur = lint.span.end;
95    }
96
97    lints.remove_indices(remove_indices);
98}
99
100/// Remove overlapping lints from a map keyed by rule name, similar to [`remove_overlaps`].
101///
102/// The map is treated as if all contained lints were in a single flat collection, ensuring the
103/// same lint would be kept regardless of whether it originated from `lint` or `organized_lints`.
104pub fn remove_overlaps_map<K: Ord>(lint_map: &mut BTreeMap<K, Vec<Lint>>) {
105    let total: usize = lint_map.values().map(Vec::len).sum();
106    if total < 2 {
107        return;
108    }
109
110    struct IndexedSpan {
111        rule_idx: usize,
112        lint_idx: usize,
113        priority: u8,
114        start: usize,
115        end: usize,
116    }
117
118    let mut removal_flags: Vec<Vec<bool>> = lint_map
119        .values()
120        .map(|lints| vec![false; lints.len()])
121        .collect();
122
123    let mut spans = Vec::with_capacity(total);
124    for (rule_idx, (_, lints)) in lint_map.iter().enumerate() {
125        for (lint_idx, lint) in lints.iter().enumerate() {
126            spans.push(IndexedSpan {
127                priority: lint.priority,
128                rule_idx,
129                lint_idx,
130                start: lint.span.start,
131                end: lint.span.end,
132            });
133        }
134    }
135
136    spans.sort_by_key(|span| span.priority);
137    spans.sort_by_key(|span| (span.start, usize::MAX - span.end));
138
139    let mut cur = 0;
140    for span in spans {
141        if span.start < cur {
142            removal_flags[span.rule_idx][span.lint_idx] = true;
143        } else {
144            cur = span.end;
145        }
146    }
147
148    for (rule_idx, (_, lints)) in lint_map.iter_mut().enumerate() {
149        if removal_flags[rule_idx].iter().all(|flag| !*flag) {
150            continue;
151        }
152
153        let mut idx = 0;
154        lints.retain(|_| {
155            let remove = removal_flags[rule_idx][idx];
156            idx += 1;
157            !remove
158        });
159    }
160}
161
162#[cfg(test)]
163mod tests {
164    use std::hash::DefaultHasher;
165    use std::hash::{Hash, Hasher};
166
167    use itertools::Itertools;
168    use quickcheck_macros::quickcheck;
169
170    use crate::linting::Lint;
171    use crate::remove_overlaps_map;
172    use crate::spell::FstDictionary;
173    use crate::{
174        Dialect, Document,
175        linting::{LintGroup, Linter},
176        remove_overlaps,
177    };
178
179    #[test]
180    fn keeps_space_lint() {
181        let doc = Document::new_plain_english_curated("Ths  tet");
182
183        let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
184
185        let mut lints = linter.lint(&doc);
186
187        dbg!(&lints);
188        remove_overlaps(&mut lints);
189        dbg!(&lints);
190
191        assert_eq!(lints.len(), 3);
192    }
193
194    #[quickcheck]
195    fn overlap_removals_have_equivalent_behavior(s: String) {
196        let doc = Document::new_plain_english_curated(&s);
197        let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
198
199        let mut lint_map = linter.organized_lints(&doc);
200        let mut lint_flat: Vec<_> = lint_map.values().flatten().cloned().collect();
201
202        remove_overlaps_map(&mut lint_map);
203        remove_overlaps(&mut lint_flat);
204
205        let post_removal_flat: Vec<_> = lint_map.values().flatten().cloned().collect();
206
207        fn hash_lint(lint: &Lint) -> u64 {
208            let mut hasher = DefaultHasher::new();
209            lint.hash(&mut hasher);
210            hasher.finish()
211        }
212
213        // We want to ignore ordering, so let us hash these first and sort them.
214        let lint_flat_hashes: Vec<_> = lint_flat.iter().map(hash_lint).sorted().collect();
215        let post_removal_flat_hashes: Vec<_> =
216            post_removal_flat.iter().map(hash_lint).sorted().collect();
217
218        assert_eq!(post_removal_flat_hashes, lint_flat_hashes);
219    }
220}