harper_core/
lib.rs

1#![doc = include_str!("../README.md")]
2#![allow(dead_code)]
3
4mod char_ext;
5mod char_string;
6mod currency;
7pub mod dict_word_metadata;
8pub mod dict_word_metadata_orthography;
9mod document;
10mod edit_distance;
11pub mod expr;
12mod fat_token;
13mod ignored_lints;
14pub mod language_detection;
15mod lexing;
16pub mod linting;
17mod mask;
18mod number;
19pub mod parsers;
20pub mod patterns;
21mod punctuation;
22mod render_markdown;
23mod span;
24pub mod spell;
25mod sync;
26mod title_case;
27mod token;
28mod token_kind;
29mod token_string_ext;
30mod vec_ext;
31
32use render_markdown::render_markdown;
33use std::collections::{BTreeMap, VecDeque};
34
35pub use char_string::{CharString, CharStringExt};
36pub use currency::Currency;
37pub use dict_word_metadata::{
38    AdverbData, ConjunctionData, Degree, DeterminerData, Dialect, DictWordMetadata, NounData,
39    PronounData, VerbData, VerbForm,
40};
41pub use document::Document;
42pub use fat_token::{FatStringToken, FatToken};
43pub use ignored_lints::{IgnoredLints, LintContext};
44use linting::Lint;
45pub use mask::{Mask, Masker};
46pub use number::{Number, OrdinalSuffix};
47pub use punctuation::{Punctuation, Quote};
48pub use span::Span;
49pub use sync::{LSend, Lrc};
50pub use title_case::{make_title_case, make_title_case_str};
51pub use token::Token;
52pub use token_kind::TokenKind;
53pub use token_string_ext::TokenStringExt;
54pub use vec_ext::VecExt;
55
56/// Return harper-core version
57pub fn core_version() -> &'static str {
58    env!("CARGO_PKG_VERSION")
59}
60
61/// A utility function that removes overlapping lints in a vector,
62/// keeping the more important ones.
63///
64/// Note: this function will change the ordering of the lints.
65pub fn remove_overlaps(lints: &mut Vec<Lint>) {
66    if lints.len() < 2 {
67        return;
68    }
69
70    let mut remove_indices = VecDeque::new();
71    lints.sort_by_key(|l| (l.span.start, !0 - l.span.end));
72
73    let mut cur = 0;
74
75    for (i, lint) in lints.iter().enumerate() {
76        if lint.span.start < cur {
77            remove_indices.push_back(i);
78            continue;
79        }
80        cur = lint.span.end;
81    }
82
83    lints.remove_indices(remove_indices);
84}
85
86/// Remove overlapping lints from a map keyed by rule name, similar to [`remove_overlaps`].
87///
88/// The map is treated as if all contained lints were in a single flat collection, ensuring the
89/// same lint would be kept regardless of whether it originated from `lint` or `organized_lints`.
90pub fn remove_overlaps_map<K: Ord>(lint_map: &mut BTreeMap<K, Vec<Lint>>) {
91    let total: usize = lint_map.values().map(Vec::len).sum();
92    if total < 2 {
93        return;
94    }
95
96    struct IndexedSpan {
97        rule_idx: usize,
98        lint_idx: usize,
99        start: usize,
100        end: usize,
101    }
102
103    let mut removal_flags: Vec<Vec<bool>> = lint_map
104        .values()
105        .map(|lints| vec![false; lints.len()])
106        .collect();
107
108    let mut spans = Vec::with_capacity(total);
109    for (rule_idx, (_, lints)) in lint_map.iter().enumerate() {
110        for (lint_idx, lint) in lints.iter().enumerate() {
111            spans.push(IndexedSpan {
112                rule_idx,
113                lint_idx,
114                start: lint.span.start,
115                end: lint.span.end,
116            });
117        }
118    }
119
120    spans.sort_by_key(|span| (span.start, usize::MAX - span.end));
121
122    let mut cur = 0;
123    for span in spans {
124        if span.start < cur {
125            removal_flags[span.rule_idx][span.lint_idx] = true;
126        } else {
127            cur = span.end;
128        }
129    }
130
131    for (rule_idx, (_, lints)) in lint_map.iter_mut().enumerate() {
132        if removal_flags[rule_idx].iter().all(|flag| !*flag) {
133            continue;
134        }
135
136        let mut idx = 0;
137        lints.retain(|_| {
138            let remove = removal_flags[rule_idx][idx];
139            idx += 1;
140            !remove
141        });
142    }
143}
144
145#[cfg(test)]
146mod tests {
147    use crate::spell::FstDictionary;
148    use crate::{
149        Dialect, Document,
150        linting::{LintGroup, Linter},
151        remove_overlaps,
152    };
153
154    #[test]
155    fn keeps_space_lint() {
156        let doc = Document::new_plain_english_curated("Ths  tet");
157
158        let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
159
160        let mut lints = linter.lint(&doc);
161
162        dbg!(&lints);
163        remove_overlaps(&mut lints);
164        dbg!(&lints);
165
166        assert_eq!(lints.len(), 3);
167    }
168}