harper_core/
lib.rs

1#![doc = include_str!("../README.md")]
2#![allow(dead_code)]
3
4mod case;
5mod char_ext;
6mod char_string;
7mod currency;
8mod dict_word_metadata;
9mod dict_word_metadata_orthography;
10mod document;
11mod edit_distance;
12pub mod expr;
13mod fat_token;
14mod ignored_lints;
15mod irregular_nouns;
16mod irregular_verbs;
17pub mod language_detection;
18mod lexing;
19pub mod linting;
20mod mask;
21mod number;
22pub mod parsers;
23pub mod patterns;
24mod punctuation;
25mod render_markdown;
26mod span;
27pub mod spell;
28mod sync;
29mod thesaurus_helper;
30mod title_case;
31mod token;
32mod token_kind;
33mod token_string_ext;
34mod vec_ext;
35pub mod weir;
36pub mod weirpack;
37
38use render_markdown::render_markdown;
39use std::collections::{BTreeMap, VecDeque};
40
41pub use case::{Case, CaseIterExt};
42pub use char_string::{CharString, CharStringExt};
43pub use currency::Currency;
44pub use dict_word_metadata::{
45    AdverbData, ConjunctionData, Degree, DeterminerData, Dialect, DialectFlags, DictWordMetadata,
46    NounData, PronounData, VerbData, VerbForm, VerbFormFlags,
47};
48pub use dict_word_metadata_orthography::{OrthFlags, Orthography};
49pub use document::Document;
50pub use fat_token::{FatStringToken, FatToken};
51pub use ignored_lints::{IgnoredLints, LintContext};
52pub use irregular_nouns::IrregularNouns;
53pub use irregular_verbs::IrregularVerbs;
54use linting::Lint;
55pub use mask::{Mask, Masker};
56pub use number::{Number, OrdinalSuffix};
57pub use punctuation::{Punctuation, Quote};
58pub use span::Span;
59pub use sync::{LSend, Lrc};
60pub use title_case::{make_title_case, make_title_case_str};
61pub use token::Token;
62pub use token_kind::TokenKind;
63pub use token_string_ext::TokenStringExt;
64pub use vec_ext::VecExt;
65
66/// Return `harper-core` version
67pub fn core_version() -> &'static str {
68    env!("CARGO_PKG_VERSION")
69}
70
71/// A utility function that removes overlapping lints in a vector,
72/// keeping the more important ones.
73///
74/// Note: this function will change the ordering of the lints.
75pub fn remove_overlaps(lints: &mut Vec<Lint>) {
76    if lints.len() < 2 {
77        return;
78    }
79
80    let mut remove_indices = VecDeque::new();
81    lints.sort_by_key(|l| (l.span.start, !0 - l.span.end));
82
83    let mut cur = 0;
84
85    for (i, lint) in lints.iter().enumerate() {
86        if lint.span.start < cur {
87            remove_indices.push_back(i);
88            continue;
89        }
90        cur = lint.span.end;
91    }
92
93    lints.remove_indices(remove_indices);
94}
95
96/// Remove overlapping lints from a map keyed by rule name, similar to [`remove_overlaps`].
97///
98/// The map is treated as if all contained lints were in a single flat collection, ensuring the
99/// same lint would be kept regardless of whether it originated from `lint` or `organized_lints`.
100pub fn remove_overlaps_map<K: Ord>(lint_map: &mut BTreeMap<K, Vec<Lint>>) {
101    let total: usize = lint_map.values().map(Vec::len).sum();
102    if total < 2 {
103        return;
104    }
105
106    struct IndexedSpan {
107        rule_idx: usize,
108        lint_idx: usize,
109        start: usize,
110        end: usize,
111    }
112
113    let mut removal_flags: Vec<Vec<bool>> = lint_map
114        .values()
115        .map(|lints| vec![false; lints.len()])
116        .collect();
117
118    let mut spans = Vec::with_capacity(total);
119    for (rule_idx, (_, lints)) in lint_map.iter().enumerate() {
120        for (lint_idx, lint) in lints.iter().enumerate() {
121            spans.push(IndexedSpan {
122                rule_idx,
123                lint_idx,
124                start: lint.span.start,
125                end: lint.span.end,
126            });
127        }
128    }
129
130    spans.sort_by_key(|span| (span.start, usize::MAX - span.end));
131
132    let mut cur = 0;
133    for span in spans {
134        if span.start < cur {
135            removal_flags[span.rule_idx][span.lint_idx] = true;
136        } else {
137            cur = span.end;
138        }
139    }
140
141    for (rule_idx, (_, lints)) in lint_map.iter_mut().enumerate() {
142        if removal_flags[rule_idx].iter().all(|flag| !*flag) {
143            continue;
144        }
145
146        let mut idx = 0;
147        lints.retain(|_| {
148            let remove = removal_flags[rule_idx][idx];
149            idx += 1;
150            !remove
151        });
152    }
153}
154
155#[cfg(test)]
156mod tests {
157    use crate::spell::FstDictionary;
158    use crate::{
159        Dialect, Document,
160        linting::{LintGroup, Linter},
161        remove_overlaps,
162    };
163
164    #[test]
165    fn keeps_space_lint() {
166        let doc = Document::new_plain_english_curated("Ths  tet");
167
168        let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
169
170        let mut lints = linter.lint(&doc);
171
172        dbg!(&lints);
173        remove_overlaps(&mut lints);
174        dbg!(&lints);
175
176        assert_eq!(lints.len(), 3);
177    }
178}
harper_core/lib.rs

harper_core/
lib.rs