harper_core/
lib.rs

1#![doc = include_str!("../README.md")]
2#![allow(dead_code)]
3
4mod char_ext;
5mod char_string;
6mod currency;
7mod dict_word_metadata;
8mod dict_word_metadata_orthography;
9mod document;
10mod edit_distance;
11pub mod expr;
12mod fat_token;
13mod ignored_lints;
14mod irregular_nouns;
15mod irregular_verbs;
16pub mod language_detection;
17mod lexing;
18pub mod linting;
19mod mask;
20mod number;
21pub mod parsers;
22pub mod patterns;
23mod punctuation;
24mod render_markdown;
25mod span;
26pub mod spell;
27mod sync;
28mod title_case;
29mod token;
30mod token_kind;
31mod token_string_ext;
32mod vec_ext;
33
34use render_markdown::render_markdown;
35use std::collections::{BTreeMap, VecDeque};
36
37pub use char_string::{CharString, CharStringExt};
38pub use currency::Currency;
39pub use dict_word_metadata::{
40    AdverbData, ConjunctionData, Degree, DeterminerData, Dialect, DialectFlags, DictWordMetadata,
41    NounData, PronounData, VerbData, VerbForm, VerbFormFlags,
42};
43pub use dict_word_metadata_orthography::{OrthFlags, Orthography};
44pub use document::Document;
45pub use fat_token::{FatStringToken, FatToken};
46pub use ignored_lints::{IgnoredLints, LintContext};
47pub use irregular_nouns::IrregularNouns;
48pub use irregular_verbs::IrregularVerbs;
49use linting::Lint;
50pub use mask::{Mask, Masker};
51pub use number::{Number, OrdinalSuffix};
52pub use punctuation::{Punctuation, Quote};
53pub use span::Span;
54pub use sync::{LSend, Lrc};
55pub use title_case::{make_title_case, make_title_case_str};
56pub use token::Token;
57pub use token_kind::TokenKind;
58pub use token_string_ext::TokenStringExt;
59pub use vec_ext::VecExt;
60
61/// Return `harper-core` version
62pub fn core_version() -> &'static str {
63    env!("CARGO_PKG_VERSION")
64}
65
66/// A utility function that removes overlapping lints in a vector,
67/// keeping the more important ones.
68///
69/// Note: this function will change the ordering of the lints.
70pub fn remove_overlaps(lints: &mut Vec<Lint>) {
71    if lints.len() < 2 {
72        return;
73    }
74
75    let mut remove_indices = VecDeque::new();
76    lints.sort_by_key(|l| (l.span.start, !0 - l.span.end));
77
78    let mut cur = 0;
79
80    for (i, lint) in lints.iter().enumerate() {
81        if lint.span.start < cur {
82            remove_indices.push_back(i);
83            continue;
84        }
85        cur = lint.span.end;
86    }
87
88    lints.remove_indices(remove_indices);
89}
90
91/// Remove overlapping lints from a map keyed by rule name, similar to [`remove_overlaps`].
92///
93/// The map is treated as if all contained lints were in a single flat collection, ensuring the
94/// same lint would be kept regardless of whether it originated from `lint` or `organized_lints`.
95pub fn remove_overlaps_map<K: Ord>(lint_map: &mut BTreeMap<K, Vec<Lint>>) {
96    let total: usize = lint_map.values().map(Vec::len).sum();
97    if total < 2 {
98        return;
99    }
100
101    struct IndexedSpan {
102        rule_idx: usize,
103        lint_idx: usize,
104        start: usize,
105        end: usize,
106    }
107
108    let mut removal_flags: Vec<Vec<bool>> = lint_map
109        .values()
110        .map(|lints| vec![false; lints.len()])
111        .collect();
112
113    let mut spans = Vec::with_capacity(total);
114    for (rule_idx, (_, lints)) in lint_map.iter().enumerate() {
115        for (lint_idx, lint) in lints.iter().enumerate() {
116            spans.push(IndexedSpan {
117                rule_idx,
118                lint_idx,
119                start: lint.span.start,
120                end: lint.span.end,
121            });
122        }
123    }
124
125    spans.sort_by_key(|span| (span.start, usize::MAX - span.end));
126
127    let mut cur = 0;
128    for span in spans {
129        if span.start < cur {
130            removal_flags[span.rule_idx][span.lint_idx] = true;
131        } else {
132            cur = span.end;
133        }
134    }
135
136    for (rule_idx, (_, lints)) in lint_map.iter_mut().enumerate() {
137        if removal_flags[rule_idx].iter().all(|flag| !*flag) {
138            continue;
139        }
140
141        let mut idx = 0;
142        lints.retain(|_| {
143            let remove = removal_flags[rule_idx][idx];
144            idx += 1;
145            !remove
146        });
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use crate::spell::FstDictionary;
153    use crate::{
154        Dialect, Document,
155        linting::{LintGroup, Linter},
156        remove_overlaps,
157    };
158
159    #[test]
160    fn keeps_space_lint() {
161        let doc = Document::new_plain_english_curated("Ths  tet");
162
163        let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
164
165        let mut lints = linter.lint(&doc);
166
167        dbg!(&lints);
168        remove_overlaps(&mut lints);
169        dbg!(&lints);
170
171        assert_eq!(lints.len(), 3);
172    }
173}