1use std::borrow::Cow;
2
3use itertools::Itertools;
4
5use crate::char_ext::CharExt;
6use crate::linting::{Lint, LintKind, Linter, Suggestion};
7use crate::{Document, TokenStringExt};
8
9#[derive(Debug, Default)]
10pub struct AnA;
11
12impl Linter for AnA {
13 fn lint(&mut self, document: &Document) -> Vec<Lint> {
14 let mut lints = Vec::new();
15
16 for chunk in document.iter_chunks() {
17 for (first_idx, second_idx) in chunk.iter_word_indices().tuple_windows() {
18 if chunk[first_idx..second_idx].iter_unlintables().count() > 0
20 || chunk[first_idx + 1..second_idx]
21 .iter_word_like_indices()
22 .count()
23 > 0
24 {
25 continue;
26 }
27
28 let first = &chunk[first_idx];
29 let second = &chunk[second_idx];
30
31 let chars_first = document.get_span_content(&first.span);
32 let chars_second = document.get_span_content(&second.span);
33 let chars_second = chars_second
36 .split(|c| !c.is_alphanumeric())
37 .next()
38 .unwrap_or(chars_second);
39
40 let is_a_an = match chars_first {
41 ['a'] => Some(true),
42 ['A'] => Some(true),
43 ['a', 'n'] => Some(false),
44 ['A', 'n'] => Some(false),
45 _ => None,
46 };
47
48 let Some(a_an) = is_a_an else {
49 continue;
50 };
51
52 let should_be_a_an = !starts_with_vowel(chars_second);
53
54 if a_an != should_be_a_an {
55 let replacement = match a_an {
56 true => vec!['a', 'n'],
57 false => vec!['a'],
58 };
59
60 lints.push(Lint {
61 span: first.span,
62 lint_kind: LintKind::Miscellaneous,
63 suggestions: vec![Suggestion::replace_with_match_case(
64 replacement,
65 chars_first,
66 )],
67 message: "Incorrect indefinite article.".to_string(),
68 priority: 31,
69 })
70 }
71 }
72 }
73
74 lints
75 }
76
77 fn description(&self) -> &'static str {
78 "A rule that looks for incorrect indefinite articles. For example, `this is an mule` would be flagged as incorrect."
79 }
80}
81
82fn to_lower_word(word: &[char]) -> Cow<'_, [char]> {
83 if word.iter().any(|c| c.is_uppercase()) {
84 Cow::Owned(
85 word.iter()
86 .flat_map(|c| c.to_lowercase())
87 .collect::<Vec<_>>(),
88 )
89 } else {
90 Cow::Borrowed(word)
91 }
92}
93
94fn starts_with_vowel(word: &[char]) -> bool {
100 let is_likely_initialism = word.iter().all(|c| !c.is_alphabetic() || c.is_uppercase());
101
102 if is_likely_initialism && !word.is_empty() && !is_likely_acronym(word) {
103 return matches!(
104 word[0],
105 'A' | 'E' | 'F' | 'H' | 'I' | 'L' | 'M' | 'N' | 'O' | 'R' | 'S' | 'X'
106 );
107 }
108
109 let word = to_lower_word(word);
110 let word = word.as_ref();
111
112 if matches!(word, ['e', 'u', 'l', 'e', ..]) {
113 return true;
114 }
115
116 if matches!(
117 word,
118 [] | ['u', 'k', ..]
119 | ['e', 'u', 'p', 'h', ..]
120 | ['e', 'u', 'g' | 'l' | 'c', ..]
121 | ['o', 'n', 'e']
122 | ['o', 'n', 'c', 'e']
123 ) {
124 return false;
125 }
126
127 if matches!(word, |['h', 'o', 'u', 'r', ..]| ['h', 'o', 'n', ..]
128 | ['u', 'n', 'i', 'n' | 'm', ..]
129 | ['u', 'n', 'a' | 'u', ..]
130 | ['h', 'e', 'r', 'b', ..]
131 | ['u', 'r', 'b', ..]
132 | ['i', 'n', 't', ..])
133 {
134 return true;
135 }
136
137 if matches!(word, ['u', 'n' | 's', 'i' | 'a' | 'u', ..]) {
138 return false;
139 }
140
141 if matches!(word, ['u', 'n', ..]) {
142 return true;
143 }
144
145 if matches!(word, ['u', 'r', 'g', ..]) {
146 return true;
147 }
148
149 if matches!(word, ['u', 't', 't', ..]) {
150 return true;
151 }
152
153 if matches!(
154 word,
155 ['u', 't' | 'r' | 'n', ..] | ['e', 'u', 'r', ..] | ['u', 'w', ..] | ['u', 's', 'e', ..]
156 ) {
157 return false;
158 }
159
160 if matches!(word, ['o', 'n', 'e', 'a' | 'e' | 'i' | 'u', 'l' | 'd', ..]) {
161 return true;
162 }
163
164 if matches!(word, ['o', 'n', 'e', 'a' | 'e' | 'i' | 'u' | '-' | 's', ..]) {
165 return false;
166 }
167
168 if matches!(
169 word,
170 ['s', 'o', 's']
171 | ['r', 'z', ..]
172 | ['n', 'g', ..]
173 | ['n', 'v', ..]
174 | ['x']
175 | ['x', 'b', 'o', 'x']
176 | ['h', 'e', 'i', 'r', ..]
177 | ['h', 'o', 'n', 'o', 'r', ..]
178 ) {
179 return true;
180 }
181
182 if matches!(
183 word,
184 ['j', 'u' | 'o', 'n', ..] | ['j', 'u', 'r', 'a' | 'i' | 'o', ..]
185 ) {
186 return false;
187 }
188
189 if matches!(word, ['x', '-' | '\'' | '.' | 'o' | 's', ..]) {
190 return true;
191 }
192
193 matches!(
194 word,
195 ['a', ..] | ['e', ..] | ['i', ..] | ['o', ..] | ['u', ..]
196 )
197}
198
199fn is_likely_acronym(word: &[char]) -> bool {
200 word.get(..3).is_some_and(|first_chars| {
203 first_chars
204 .iter()
205 .take(2)
206 .fold(0, |acc, char| acc + !char.is_vowel() as u8)
207 < 2
208 })
209}
210
211#[cfg(test)]
212mod tests {
213 use super::AnA;
214 use crate::linting::tests::assert_lint_count;
215
216 #[test]
217 fn detects_html_as_vowel() {
218 assert_lint_count("Here is a HTML document.", AnA, 1);
219 }
220
221 #[test]
222 fn detects_llm_as_vowel() {
223 assert_lint_count("Here is a LLM document.", AnA, 1);
224 }
225
226 #[test]
227 fn detects_llm_hyphen_as_vowel() {
228 assert_lint_count("Here is a LLM-based system.", AnA, 1);
229 }
230
231 #[test]
232 fn detects_euler_as_vowel() {
233 assert_lint_count("This is an Euler brick.", AnA, 0);
234 assert_lint_count("The graph has an Eulerian tour.", AnA, 0);
235 }
236
237 #[test]
238 fn capitalized_fourier() {
239 assert_lint_count("Then, perform a Fourier transform.", AnA, 0);
240 }
241
242 #[test]
243 fn once_over() {
244 assert_lint_count("give this a once-over.", AnA, 0);
245 }
246
247 #[test]
248 fn issue_196() {
249 assert_lint_count("This is formatted as an `ext4` file system.", AnA, 0);
250 }
251
252 #[test]
253 fn allows_lowercase_vowels() {
254 assert_lint_count("not an error", AnA, 0);
255 }
256
257 #[test]
258 fn allows_lowercase_consonants() {
259 assert_lint_count("not a crash", AnA, 0);
260 }
261
262 #[test]
263 fn disallows_lowercase_vowels() {
264 assert_lint_count("not a error", AnA, 1);
265 }
266
267 #[test]
268 fn disallows_lowercase_consonants() {
269 assert_lint_count("not an crash", AnA, 1);
270 }
271
272 #[test]
273 fn allows_uppercase_vowels() {
274 assert_lint_count("not an Error", AnA, 0);
275 }
276
277 #[test]
278 fn allows_uppercase_consonants() {
279 assert_lint_count("not a Crash", AnA, 0);
280 }
281
282 #[test]
283 fn disallows_uppercase_vowels() {
284 assert_lint_count("not a Error", AnA, 1);
285 }
286
287 #[test]
288 fn disallows_uppercase_consonants() {
289 assert_lint_count("not an Crash", AnA, 1);
290 }
291
292 #[test]
293 fn disallows_a_interface() {
294 assert_lint_count(
295 "A interface for an object that can perform linting actions.",
296 AnA,
297 1,
298 );
299 }
300
301 #[test]
302 fn allow_issue_751() {
303 assert_lint_count("He got a 52% approval rating.", AnA, 0);
304 }
305
306 #[test]
307 fn allow_an_mp_and_an_mp3() {
308 assert_lint_count("an MP and an MP3?", AnA, 0);
309 }
310
311 #[test]
312 fn disallow_a_mp_and_a_mp3() {
313 assert_lint_count("a MP and a MP3?", AnA, 2);
314 }
315
316 #[test]
317 fn recognize_acronyms() {
318 assert_lint_count("using a MAC address", AnA, 0);
320 assert_lint_count("a NASA spacecraft", AnA, 0);
321 assert_lint_count("a NAT", AnA, 0);
322 assert_lint_count("a REST API", AnA, 0);
323 assert_lint_count("a LIBERO", AnA, 0);
324 assert_lint_count("a README", AnA, 0);
325 assert_lint_count("a LAN", AnA, 0);
326
327 assert_lint_count("an RA message", AnA, 0);
329 assert_lint_count("an SI unit", AnA, 0);
330 assert_lint_count("he is an MA of both Oxford and Cambridge", AnA, 0);
331 assert_lint_count("in an FA Cup 6th Round match", AnA, 0);
332 assert_lint_count("a AM transmitter", AnA, 1);
333 }
334}