harper_core/
lib.rs

1#![doc = include_str!("../README.md")]
2#![allow(dead_code)]
3
4mod case;
5mod char_ext;
6mod char_string;
7mod currency;
8mod dict_word_metadata;
9mod dict_word_metadata_orthography;
10mod document;
11mod edit_distance;
12pub mod expr;
13mod fat_token;
14mod ignored_lints;
15mod irregular_nouns;
16mod irregular_verbs;
17pub mod language_detection;
18mod lexing;
19pub mod linting;
20mod mask;
21mod number;
22pub mod parsers;
23pub mod patterns;
24mod punctuation;
25mod render_markdown;
26mod span;
27pub mod spell;
28mod sync;
29mod thesaurus_helper;
30mod title_case;
31mod token;
32mod token_kind;
33mod token_string_ext;
34mod vec_ext;
35pub mod weir;
36
37use render_markdown::render_markdown;
38use std::collections::{BTreeMap, VecDeque};
39
40pub use case::{Case, CaseIterExt};
41pub use char_string::{CharString, CharStringExt};
42pub use currency::Currency;
43pub use dict_word_metadata::{
44    AdverbData, ConjunctionData, Degree, DeterminerData, Dialect, DialectFlags, DictWordMetadata,
45    NounData, PronounData, VerbData, VerbForm, VerbFormFlags,
46};
47pub use dict_word_metadata_orthography::{OrthFlags, Orthography};
48pub use document::Document;
49pub use fat_token::{FatStringToken, FatToken};
50pub use ignored_lints::{IgnoredLints, LintContext};
51pub use irregular_nouns::IrregularNouns;
52pub use irregular_verbs::IrregularVerbs;
53use linting::Lint;
54pub use mask::{Mask, Masker};
55pub use number::{Number, OrdinalSuffix};
56pub use punctuation::{Punctuation, Quote};
57pub use span::Span;
58pub use sync::{LSend, Lrc};
59pub use title_case::{make_title_case, make_title_case_str};
60pub use token::Token;
61pub use token_kind::TokenKind;
62pub use token_string_ext::TokenStringExt;
63pub use vec_ext::VecExt;
64
65/// Return `harper-core` version
66pub fn core_version() -> &'static str {
67    env!("CARGO_PKG_VERSION")
68}
69
70/// A utility function that removes overlapping lints in a vector,
71/// keeping the more important ones.
72///
73/// Note: this function will change the ordering of the lints.
74pub fn remove_overlaps(lints: &mut Vec<Lint>) {
75    if lints.len() < 2 {
76        return;
77    }
78
79    let mut remove_indices = VecDeque::new();
80    lints.sort_by_key(|l| (l.span.start, !0 - l.span.end));
81
82    let mut cur = 0;
83
84    for (i, lint) in lints.iter().enumerate() {
85        if lint.span.start < cur {
86            remove_indices.push_back(i);
87            continue;
88        }
89        cur = lint.span.end;
90    }
91
92    lints.remove_indices(remove_indices);
93}
94
95/// Remove overlapping lints from a map keyed by rule name, similar to [`remove_overlaps`].
96///
97/// The map is treated as if all contained lints were in a single flat collection, ensuring the
98/// same lint would be kept regardless of whether it originated from `lint` or `organized_lints`.
99pub fn remove_overlaps_map<K: Ord>(lint_map: &mut BTreeMap<K, Vec<Lint>>) {
100    let total: usize = lint_map.values().map(Vec::len).sum();
101    if total < 2 {
102        return;
103    }
104
105    struct IndexedSpan {
106        rule_idx: usize,
107        lint_idx: usize,
108        start: usize,
109        end: usize,
110    }
111
112    let mut removal_flags: Vec<Vec<bool>> = lint_map
113        .values()
114        .map(|lints| vec![false; lints.len()])
115        .collect();
116
117    let mut spans = Vec::with_capacity(total);
118    for (rule_idx, (_, lints)) in lint_map.iter().enumerate() {
119        for (lint_idx, lint) in lints.iter().enumerate() {
120            spans.push(IndexedSpan {
121                rule_idx,
122                lint_idx,
123                start: lint.span.start,
124                end: lint.span.end,
125            });
126        }
127    }
128
129    spans.sort_by_key(|span| (span.start, usize::MAX - span.end));
130
131    let mut cur = 0;
132    for span in spans {
133        if span.start < cur {
134            removal_flags[span.rule_idx][span.lint_idx] = true;
135        } else {
136            cur = span.end;
137        }
138    }
139
140    for (rule_idx, (_, lints)) in lint_map.iter_mut().enumerate() {
141        if removal_flags[rule_idx].iter().all(|flag| !*flag) {
142            continue;
143        }
144
145        let mut idx = 0;
146        lints.retain(|_| {
147            let remove = removal_flags[rule_idx][idx];
148            idx += 1;
149            !remove
150        });
151    }
152}
153
154#[cfg(test)]
155mod tests {
156    use crate::spell::FstDictionary;
157    use crate::{
158        Dialect, Document,
159        linting::{LintGroup, Linter},
160        remove_overlaps,
161    };
162
163    #[test]
164    fn keeps_space_lint() {
165        let doc = Document::new_plain_english_curated("Ths  tet");
166
167        let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
168
169        let mut lints = linter.lint(&doc);
170
171        dbg!(&lints);
172        remove_overlaps(&mut lints);
173        dbg!(&lints);
174
175        assert_eq!(lints.len(), 3);
176    }
177}