harper_core/
lib.rs

1#![doc = include_str!("../README.md")]
2#![allow(dead_code)]
3
4mod char_ext;
5mod char_string;
6mod currency;
7pub mod dict_word_metadata;
8pub mod dict_word_metadata_orthography;
9mod document;
10mod edit_distance;
11pub mod expr;
12mod fat_token;
13mod ignored_lints;
14pub mod language_detection;
15mod lexing;
16pub mod linting;
17mod mask;
18mod number;
19pub mod parsers;
20pub mod patterns;
21mod punctuation;
22mod render_markdown;
23mod span;
24pub mod spell;
25mod sync;
26mod title_case;
27mod token;
28mod token_kind;
29mod token_string_ext;
30mod vec_ext;
31
32use render_markdown::render_markdown;
33use std::collections::{BTreeMap, VecDeque};
34
35pub use char_string::{CharString, CharStringExt};
36pub use currency::Currency;
37pub use dict_word_metadata::{
38    AdverbData, ConjunctionData, Degree, DeterminerData, Dialect, DictWordMetadata, NounData,
39    PronounData, VerbData, VerbForm,
40};
41pub use dict_word_metadata_orthography::{OrthFlags, Orthography};
42pub use document::Document;
43pub use fat_token::{FatStringToken, FatToken};
44pub use ignored_lints::{IgnoredLints, LintContext};
45use linting::Lint;
46pub use mask::{Mask, Masker};
47pub use number::{Number, OrdinalSuffix};
48pub use punctuation::{Punctuation, Quote};
49pub use span::Span;
50pub use sync::{LSend, Lrc};
51pub use title_case::{make_title_case, make_title_case_str};
52pub use token::Token;
53pub use token_kind::TokenKind;
54pub use token_string_ext::TokenStringExt;
55pub use vec_ext::VecExt;
56
57/// Return harper-core version
58pub fn core_version() -> &'static str {
59    env!("CARGO_PKG_VERSION")
60}
61
62/// A utility function that removes overlapping lints in a vector,
63/// keeping the more important ones.
64///
65/// Note: this function will change the ordering of the lints.
66pub fn remove_overlaps(lints: &mut Vec<Lint>) {
67    if lints.len() < 2 {
68        return;
69    }
70
71    let mut remove_indices = VecDeque::new();
72    lints.sort_by_key(|l| (l.span.start, !0 - l.span.end));
73
74    let mut cur = 0;
75
76    for (i, lint) in lints.iter().enumerate() {
77        if lint.span.start < cur {
78            remove_indices.push_back(i);
79            continue;
80        }
81        cur = lint.span.end;
82    }
83
84    lints.remove_indices(remove_indices);
85}
86
87/// Remove overlapping lints from a map keyed by rule name, similar to [`remove_overlaps`].
88///
89/// The map is treated as if all contained lints were in a single flat collection, ensuring the
90/// same lint would be kept regardless of whether it originated from `lint` or `organized_lints`.
91pub fn remove_overlaps_map<K: Ord>(lint_map: &mut BTreeMap<K, Vec<Lint>>) {
92    let total: usize = lint_map.values().map(Vec::len).sum();
93    if total < 2 {
94        return;
95    }
96
97    struct IndexedSpan {
98        rule_idx: usize,
99        lint_idx: usize,
100        start: usize,
101        end: usize,
102    }
103
104    let mut removal_flags: Vec<Vec<bool>> = lint_map
105        .values()
106        .map(|lints| vec![false; lints.len()])
107        .collect();
108
109    let mut spans = Vec::with_capacity(total);
110    for (rule_idx, (_, lints)) in lint_map.iter().enumerate() {
111        for (lint_idx, lint) in lints.iter().enumerate() {
112            spans.push(IndexedSpan {
113                rule_idx,
114                lint_idx,
115                start: lint.span.start,
116                end: lint.span.end,
117            });
118        }
119    }
120
121    spans.sort_by_key(|span| (span.start, usize::MAX - span.end));
122
123    let mut cur = 0;
124    for span in spans {
125        if span.start < cur {
126            removal_flags[span.rule_idx][span.lint_idx] = true;
127        } else {
128            cur = span.end;
129        }
130    }
131
132    for (rule_idx, (_, lints)) in lint_map.iter_mut().enumerate() {
133        if removal_flags[rule_idx].iter().all(|flag| !*flag) {
134            continue;
135        }
136
137        let mut idx = 0;
138        lints.retain(|_| {
139            let remove = removal_flags[rule_idx][idx];
140            idx += 1;
141            !remove
142        });
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use crate::spell::FstDictionary;
149    use crate::{
150        Dialect, Document,
151        linting::{LintGroup, Linter},
152        remove_overlaps,
153    };
154
155    #[test]
156    fn keeps_space_lint() {
157        let doc = Document::new_plain_english_curated("Ths  tet");
158
159        let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
160
161        let mut lints = linter.lint(&doc);
162
163        dbg!(&lints);
164        remove_overlaps(&mut lints);
165        dbg!(&lints);
166
167        assert_eq!(lints.len(), 3);
168    }
169}