1#![doc = include_str!("../README.md")]
2#![allow(dead_code)]
3
4mod case;
5mod char_ext;
6mod char_string;
7mod currency;
8mod dict_word_metadata;
9mod dict_word_metadata_orthography;
10mod document;
11mod edit_distance;
12pub mod expr;
13mod fat_token;
14mod ignored_lints;
15mod indefinite_article;
16mod irregular_nouns;
17mod irregular_verbs;
18pub mod language_detection;
19mod lexing;
20pub mod linting;
21mod mask;
22mod number;
23mod offsets;
24pub mod parsers;
25pub mod patterns;
26mod punctuation;
27mod regular_nouns;
28mod render_markdown;
29mod span;
30pub mod spell;
31mod sync;
32mod thesaurus_helper;
33mod title_case;
34mod token;
35mod token_kind;
36mod token_string_ext;
37mod vec_ext;
38pub mod weir;
39pub mod weirpack;
40
41use render_markdown::render_markdown;
42use std::collections::{BTreeMap, VecDeque};
43
44pub use case::{Case, CaseIterExt};
45pub use char_string::{CharString, CharStringExt};
46pub use currency::Currency;
47pub use dict_word_metadata::{
48 AdverbData, ConjunctionData, Degree, DeterminerData, Dialect, DialectFlags, DictWordMetadata,
49 NounData, PronounData, VerbData, VerbForm, VerbFormFlags,
50};
51pub use dict_word_metadata_orthography::{OrthFlags, Orthography};
52pub use document::Document;
53pub use fat_token::{FatStringToken, FatToken};
54pub use ignored_lints::{IgnoredLints, LintContext};
55pub use indefinite_article::{InitialSound, starts_with_vowel};
56pub use irregular_nouns::IrregularNouns;
57pub use irregular_verbs::IrregularVerbs;
58use linting::Lint;
59pub use mask::{Mask, Masker, RegexMasker};
60pub use number::{Number, OrdinalSuffix};
61pub use punctuation::{Punctuation, Quote};
62pub use regular_nouns::{get_plurals, get_singulars};
63pub use span::Span;
64pub use sync::{LSend, Lrc};
65pub use title_case::{make_title_case, make_title_case_str};
66pub use token::Token;
67pub use token_kind::TokenKind;
68pub use token_string_ext::TokenStringExt;
69pub use vec_ext::VecExt;
70
71pub fn core_version() -> &'static str {
73 env!("CARGO_PKG_VERSION")
74}
75
76pub fn remove_overlaps(lints: &mut Vec<Lint>) {
81 if lints.len() < 2 {
82 return;
83 }
84
85 let mut remove_indices = VecDeque::new();
86 lints.sort_by_key(|l| l.priority);
87 lints.sort_by_key(|l| (l.span.start, !0 - l.span.end));
88
89 let mut cur = 0;
90
91 for (i, lint) in lints.iter().enumerate() {
92 if lint.span.start < cur {
93 remove_indices.push_back(i);
94 continue;
95 }
96 cur = lint.span.end;
97 }
98
99 lints.remove_indices(remove_indices);
100}
101
102pub fn remove_lints_overlapping_expr<E: expr::Expr + ?Sized>(
108 expr: &E,
109 document: &Document,
110 lints: &mut Vec<Lint>,
111) {
112 if lints.is_empty() {
113 return;
114 }
115
116 let tokens = document.get_tokens();
117 let source = document.get_source();
118 let matched_spans: Vec<Span<char>> = (0..tokens.len())
119 .filter_map(|cursor| {
120 let token_span = expr.run(cursor, tokens, source)?;
121
122 if token_span.is_empty() {
123 None
124 } else {
125 Some(token_span.to_char_span(tokens))
126 }
127 })
128 .collect();
129
130 if matched_spans.is_empty() {
131 return;
132 }
133
134 lints.retain(|lint| {
135 !matched_spans
136 .iter()
137 .any(|matched_span| lint.span.overlaps_with(*matched_span))
138 });
139}
140
141pub fn remove_overlaps_map<K: Ord>(lint_map: &mut BTreeMap<K, Vec<Lint>>) {
146 let total: usize = lint_map.values().map(Vec::len).sum();
147 if total < 2 {
148 return;
149 }
150
151 struct IndexedSpan {
152 rule_idx: usize,
153 lint_idx: usize,
154 priority: u8,
155 start: usize,
156 end: usize,
157 }
158
159 let mut removal_flags: Vec<Vec<bool>> = lint_map
160 .values()
161 .map(|lints| vec![false; lints.len()])
162 .collect();
163
164 let mut spans = Vec::with_capacity(total);
165 for (rule_idx, (_, lints)) in lint_map.iter().enumerate() {
166 for (lint_idx, lint) in lints.iter().enumerate() {
167 spans.push(IndexedSpan {
168 priority: lint.priority,
169 rule_idx,
170 lint_idx,
171 start: lint.span.start,
172 end: lint.span.end,
173 });
174 }
175 }
176
177 spans.sort_by_key(|span| span.priority);
178 spans.sort_by_key(|span| (span.start, usize::MAX - span.end));
179
180 let mut cur = 0;
181 for span in spans {
182 if span.start < cur {
183 removal_flags[span.rule_idx][span.lint_idx] = true;
184 } else {
185 cur = span.end;
186 }
187 }
188
189 for (rule_idx, (_, lints)) in lint_map.iter_mut().enumerate() {
190 if removal_flags[rule_idx].iter().all(|flag| !*flag) {
191 continue;
192 }
193
194 let mut idx = 0;
195 lints.retain(|_| {
196 let remove = removal_flags[rule_idx][idx];
197 idx += 1;
198 !remove
199 });
200 }
201}
202
203#[cfg(test)]
204mod tests {
205 use std::hash::DefaultHasher;
206 use std::hash::{Hash, Hasher};
207
208 use itertools::Itertools;
209 use quickcheck_macros::quickcheck;
210
211 use crate::linting::Lint;
212 use crate::remove_overlaps_map;
213 use crate::spell::FstDictionary;
214 use crate::{
215 Dialect, Document, Span,
216 expr::{AnchorStart, SequenceExpr},
217 linting::{LintGroup, Linter},
218 remove_lints_overlapping_expr, remove_overlaps,
219 };
220
221 #[test]
222 fn keeps_space_lint() {
223 let doc = Document::new_plain_english_curated("Ths tet");
224
225 let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
226
227 let mut lints = linter.lint(&doc);
228
229 dbg!(&lints);
230 remove_overlaps(&mut lints);
231 dbg!(&lints);
232
233 assert_eq!(lints.len(), 3);
234 }
235
236 #[test]
237 fn remove_lints_overlapping_expr_removes_overlapping_lints() {
238 let doc = Document::new_plain_english_curated("keep bad keep");
239 let mut lints = vec![Lint {
240 span: Span::new(5, 8),
241 ..Default::default()
242 }];
243
244 remove_lints_overlapping_expr(&SequenceExpr::aco("bad"), &doc, &mut lints);
245
246 assert!(lints.is_empty());
247 }
248
249 #[test]
250 fn remove_lints_overlapping_expr_keeps_non_overlapping_lints() {
251 let doc = Document::new_plain_english_curated("keep bad keep");
252 let mut lints = vec![Lint {
253 span: Span::new(0, 4),
254 ..Default::default()
255 }];
256
257 remove_lints_overlapping_expr(&SequenceExpr::aco("bad"), &doc, &mut lints);
258
259 assert_eq!(lints.len(), 1);
260 }
261
262 #[test]
263 fn remove_lints_overlapping_expr_ignores_zero_width_matches() {
264 let doc = Document::new_plain_english_curated("bad");
265 let mut lints = vec![Lint {
266 span: Span::new(0, 3),
267 ..Default::default()
268 }];
269
270 remove_lints_overlapping_expr(&AnchorStart, &doc, &mut lints);
271
272 assert_eq!(lints.len(), 1);
273 }
274
275 #[quickcheck]
276 fn overlap_removals_have_equivalent_behavior(s: String) {
277 let doc = Document::new_plain_english_curated(&s);
278 let mut linter = LintGroup::new_curated(FstDictionary::curated(), Dialect::American);
279
280 let mut lint_map = linter.organized_lints(&doc);
281 let mut lint_flat: Vec<_> = lint_map.values().flatten().cloned().collect();
282
283 remove_overlaps_map(&mut lint_map);
284 remove_overlaps(&mut lint_flat);
285
286 let post_removal_flat: Vec<_> = lint_map.values().flatten().cloned().collect();
287
288 fn hash_lint(lint: &Lint) -> u64 {
289 let mut hasher = DefaultHasher::new();
290 lint.hash(&mut hasher);
291 hasher.finish()
292 }
293
294 let lint_flat_hashes: Vec<_> = lint_flat.iter().map(hash_lint).sorted().collect();
296 let post_removal_flat_hashes: Vec<_> =
297 post_removal_flat.iter().map(hash_lint).sorted().collect();
298
299 assert_eq!(post_removal_flat_hashes, lint_flat_hashes);
300 }
301}