1use std::borrow::Cow;
2
3use itertools::Itertools;
4
5use crate::char_ext::CharExt;
6use crate::linting::{Lint, LintKind, Linter, Suggestion};
7use crate::{Document, TokenStringExt};
8
9#[derive(Debug, Default)]
10pub struct AnA;
11
12impl Linter for AnA {
13 fn lint(&mut self, document: &Document) -> Vec<Lint> {
14 let mut lints = Vec::new();
15
16 for chunk in document.iter_chunks() {
17 for (first_idx, second_idx) in chunk.iter_word_indices().tuple_windows() {
18 if chunk[first_idx..second_idx].iter_unlintables().count() > 0
20 || chunk[first_idx + 1..second_idx]
21 .iter_word_like_indices()
22 .count()
23 > 0
24 {
25 continue;
26 }
27
28 let first = &chunk[first_idx];
29 let second = &chunk[second_idx];
30
31 let chars_first = document.get_span_content(&first.span);
32 let chars_second = document.get_span_content(&second.span);
33 let chars_second = chars_second
36 .split(|c| !c.is_alphanumeric())
37 .next()
38 .unwrap_or(chars_second);
39
40 let is_a_an = match chars_first {
41 ['a'] => Some(true),
42 ['A'] => Some(true),
43 ['a', 'n'] => Some(false),
44 ['A', 'n'] => Some(false),
45 _ => None,
46 };
47
48 let Some(a_an) = is_a_an else {
49 continue;
50 };
51
52 let should_be_a_an = !starts_with_vowel(chars_second);
53
54 if a_an != should_be_a_an {
55 let replacement = match a_an {
56 true => vec!['a', 'n'],
57 false => vec!['a'],
58 };
59
60 lints.push(Lint {
61 span: first.span,
62 lint_kind: LintKind::Miscellaneous,
63 suggestions: vec![Suggestion::replace_with_match_case(
64 replacement,
65 chars_first,
66 )],
67 message: "Incorrect indefinite article.".to_string(),
68 priority: 31,
69 })
70 }
71 }
72 }
73
74 lints
75 }
76
77 fn description(&self) -> &'static str {
78 "A rule that looks for incorrect indefinite articles. For example, `this is an mule` would be flagged as incorrect."
79 }
80}
81
82fn to_lower_word(word: &[char]) -> Cow<'_, [char]> {
83 if word.iter().any(|c| c.is_uppercase()) {
84 Cow::Owned(
85 word.iter()
86 .flat_map(|c| c.to_lowercase())
87 .collect::<Vec<_>>(),
88 )
89 } else {
90 Cow::Borrowed(word)
91 }
92}
93
94fn starts_with_vowel(word: &[char]) -> bool {
100 let is_likely_initialism = word.iter().all(|c| !c.is_alphabetic() || c.is_uppercase());
101
102 if is_likely_initialism && !word.is_empty() && !is_likely_acronym(word) {
103 return matches!(
104 word[0],
105 'A' | 'E' | 'F' | 'H' | 'I' | 'L' | 'M' | 'N' | 'O' | 'R' | 'S' | 'X'
106 );
107 }
108
109 let word = to_lower_word(word);
110 let word = word.as_ref();
111
112 if matches!(
113 word,
114 [] | ['u', 'k', ..]
115 | ['e', 'u', 'p', 'h', ..]
116 | ['e', 'u', 'g' | 'l' | 'c', ..]
117 | ['o', 'n', 'e']
118 | ['o', 'n', 'c', 'e']
119 ) {
120 return false;
121 }
122
123 if matches!(word, |['h', 'o', 'u', 'r', ..]| ['h', 'o', 'n', ..]
124 | ['u', 'n', 'i', 'n' | 'm', ..]
125 | ['u', 'n', 'a' | 'u', ..]
126 | ['h', 'e', 'r', 'b', ..]
127 | ['u', 'r', 'b', ..]
128 | ['i', 'n', 't', ..])
129 {
130 return true;
131 }
132
133 if matches!(word, ['u', 'n' | 's', 'i' | 'a' | 'u', ..]) {
134 return false;
135 }
136
137 if matches!(word, ['u', 'n', ..]) {
138 return true;
139 }
140
141 if matches!(word, ['u', 'r', 'g', ..]) {
142 return true;
143 }
144
145 if matches!(word, ['u', 't', 't', ..]) {
146 return true;
147 }
148
149 if matches!(
150 word,
151 ['u', 't' | 'r' | 'n', ..] | ['e', 'u', 'r', ..] | ['u', 'w', ..] | ['u', 's', 'e', ..]
152 ) {
153 return false;
154 }
155
156 if matches!(word, ['o', 'n', 'e', 'a' | 'e' | 'i' | 'u', 'l' | 'd', ..]) {
157 return true;
158 }
159
160 if matches!(word, ['o', 'n', 'e', 'a' | 'e' | 'i' | 'u' | '-' | 's', ..]) {
161 return false;
162 }
163
164 if matches!(
165 word,
166 ['s', 'o', 's']
167 | ['r', 'z', ..]
168 | ['n', 'g', ..]
169 | ['n', 'v', ..]
170 | ['x']
171 | ['x', 'b', 'o', 'x']
172 | ['h', 'e', 'i', 'r', ..]
173 | ['h', 'o', 'n', 'o', 'r', ..]
174 ) {
175 return true;
176 }
177
178 if matches!(
179 word,
180 ['j', 'u' | 'o', 'n', ..] | ['j', 'u', 'r', 'a' | 'i' | 'o', ..]
181 ) {
182 return false;
183 }
184
185 if matches!(word, ['x', '-' | '\'' | '.' | 'o' | 's', ..]) {
186 return true;
187 }
188
189 matches!(
190 word,
191 ['a', ..] | ['e', ..] | ['i', ..] | ['o', ..] | ['u', ..]
192 )
193}
194
195fn is_likely_acronym(word: &[char]) -> bool {
196 word.get(..3).is_some_and(|first_chars| {
199 first_chars
200 .iter()
201 .take(2)
202 .fold(0, |acc, char| acc + !char.is_vowel() as u8)
203 < 2
204 })
205}
206
207#[cfg(test)]
208mod tests {
209 use super::AnA;
210 use crate::linting::tests::assert_lint_count;
211
212 #[test]
213 fn detects_html_as_vowel() {
214 assert_lint_count("Here is a HTML document.", AnA, 1);
215 }
216
217 #[test]
218 fn detects_llm_as_vowel() {
219 assert_lint_count("Here is a LLM document.", AnA, 1);
220 }
221
222 #[test]
223 fn detects_llm_hyphen_as_vowel() {
224 assert_lint_count("Here is a LLM-based system.", AnA, 1);
225 }
226
227 #[test]
228 fn capitalized_fourier() {
229 assert_lint_count("Then, perform a Fourier transform.", AnA, 0);
230 }
231
232 #[test]
233 fn once_over() {
234 assert_lint_count("give this a once-over.", AnA, 0);
235 }
236
237 #[test]
238 fn issue_196() {
239 assert_lint_count("This is formatted as an `ext4` file system.", AnA, 0);
240 }
241
242 #[test]
243 fn allows_lowercase_vowels() {
244 assert_lint_count("not an error", AnA, 0);
245 }
246
247 #[test]
248 fn allows_lowercase_consonants() {
249 assert_lint_count("not a crash", AnA, 0);
250 }
251
252 #[test]
253 fn disallows_lowercase_vowels() {
254 assert_lint_count("not a error", AnA, 1);
255 }
256
257 #[test]
258 fn disallows_lowercase_consonants() {
259 assert_lint_count("not an crash", AnA, 1);
260 }
261
262 #[test]
263 fn allows_uppercase_vowels() {
264 assert_lint_count("not an Error", AnA, 0);
265 }
266
267 #[test]
268 fn allows_uppercase_consonants() {
269 assert_lint_count("not a Crash", AnA, 0);
270 }
271
272 #[test]
273 fn disallows_uppercase_vowels() {
274 assert_lint_count("not a Error", AnA, 1);
275 }
276
277 #[test]
278 fn disallows_uppercase_consonants() {
279 assert_lint_count("not an Crash", AnA, 1);
280 }
281
282 #[test]
283 fn disallows_a_interface() {
284 assert_lint_count(
285 "A interface for an object that can perform linting actions.",
286 AnA,
287 1,
288 );
289 }
290
291 #[test]
292 fn allow_issue_751() {
293 assert_lint_count("He got a 52% approval rating.", AnA, 0);
294 }
295
296 #[test]
297 fn allow_an_mp_and_an_mp3() {
298 assert_lint_count("an MP and an MP3?", AnA, 0);
299 }
300
301 #[test]
302 fn disallow_a_mp_and_a_mp3() {
303 assert_lint_count("a MP and a MP3?", AnA, 2);
304 }
305
306 #[test]
307 fn recognize_acronyms() {
308 assert_lint_count("using a MAC address", AnA, 0);
310 assert_lint_count("a NASA spacecraft", AnA, 0);
311 assert_lint_count("a NAT", AnA, 0);
312 assert_lint_count("a REST API", AnA, 0);
313 assert_lint_count("a LIBERO", AnA, 0);
314 assert_lint_count("a README", AnA, 0);
315 assert_lint_count("a LAN", AnA, 0);
316
317 assert_lint_count("an RA message", AnA, 0);
319 assert_lint_count("an SI unit", AnA, 0);
320 assert_lint_count("he is an MA of both Oxford and Cambridge", AnA, 0);
321 assert_lint_count("in an FA Cup 6th Round match", AnA, 0);
322 assert_lint_count("a AM transmitter", AnA, 1);
323 }
324}