1use std::borrow::Cow;
2
3use itertools::Itertools;
4
5use crate::case::Case::Upper;
6use crate::char_ext::CharExt;
7use crate::{CaseIterExt, Dialect};
8
9#[derive(PartialEq)]
10pub enum InitialSound {
11 Vowel,
12 Consonant,
13 Either, }
15
16pub fn starts_with_vowel(word: &[char], dialect: Dialect) -> Option<InitialSound> {
22 if word.is_empty() {
23 return None;
24 }
25
26 if matches!(word, ['L', 'E', 'D'] | ['S', 'Q', 'L'] | ['U', 'R', 'L']) {
27 return Some(InitialSound::Either);
28 }
29
30 let word = {
35 let word_casing = word.get_casing_unfiltered();
36 match word_casing.as_slice() {
37 [Some(first_char_case), Some(Upper), ..] => {
39 &word[0..word_casing
40 .iter()
41 .position(|c| *c != Some(*first_char_case))
42 .unwrap_or(word.len())]
43 }
44 _ => word,
46 }
47 };
48
49 let is_likely_initialism = word.iter().all(|c| !c.is_alphabetic() || c.is_uppercase());
50
51 if word.len() == 1 || (is_likely_initialism && !is_likely_acronym(word)) {
52 return Some(
53 if matches!(
54 word[0].to_ascii_uppercase(),
55 'A' | 'E' | 'F' | 'H' | 'I' | 'L' | 'M' | 'N' | 'O' | 'R' | 'S' | 'X'
56 ) {
57 InitialSound::Vowel
58 } else {
59 InitialSound::Consonant
60 },
61 );
62 }
63
64 let word = to_lower_word(word);
65 let word = word.as_ref();
66
67 if matches!(word, ['u', 'b', 'i', ..]) {
68 return Some(InitialSound::Either);
69 }
70
71 if matches!(word, ['e', 'u', 'l', 'e', ..]) {
72 return Some(InitialSound::Vowel);
73 }
74
75 if matches!(
76 word,
77 ['u', 'k', ..]
78 | ['u', 'd', 'e', ..] | ['e', 'u', 'p', 'h', ..]
80 | ['e', 'u', 'g' | 'l' | 'c', ..]
81 | ['o', 'n', 'e', ..]
82 | ['o', 'n', 'c', 'e']
83 ) {
84 return Some(InitialSound::Consonant);
85 }
86
87 if matches!(
88 word,
89 ['h', 'o', 'u', 'r', ..]
90 | ['u', 'n', 'i', 'n' | 'm', ..]
91 | ['u', 'n', 'a' | 'u', ..]
92 | ['u', 'r', 'b', ..]
93 | ['i', 'n', 't', ..]
94 ) {
95 return Some(InitialSound::Vowel);
96 }
97
98 if matches!(word, ['h', 'e', 'r', 'b', ..] if dialect == Dialect::American || dialect == Dialect::Canadian)
99 {
100 return Some(InitialSound::Vowel);
101 }
102
103 if matches!(word, ['u', 'n' | 's', 'i' | 'a' | 'u', ..]) {
104 return Some(InitialSound::Consonant);
105 }
106
107 if matches!(word, ['u', 'n', ..]) {
108 return Some(InitialSound::Vowel);
109 }
110
111 if matches!(word, ['u', 'r', 'g', ..]) {
112 return Some(InitialSound::Vowel);
113 }
114
115 if matches!(word, ['u', 't', 't', ..]) {
116 return Some(InitialSound::Vowel);
117 }
118
119 if matches!(
120 word,
121 ['u', 't' | 'r' | 'n', ..] | ['e', 'u', 'r', ..] | ['u', 'w', ..] | ['u', 's', 'e', ..]
122 ) {
123 return Some(InitialSound::Consonant);
124 }
125
126 if matches!(word, ['o', 'n', 'e', 'a' | 'e' | 'i' | 'u', 'l' | 'd', ..]) {
127 return Some(InitialSound::Vowel);
128 }
129
130 if matches!(word, ['o', 'n', 'e', 'a' | 'e' | 'i' | 'u' | '-' | 's', ..]) {
131 return Some(InitialSound::Consonant);
132 }
133
134 if matches!(
135 word,
136 ['s', 'o', 's']
137 | ['r', 'z', ..]
138 | ['n', 'g', ..]
139 | ['n', 'v', ..]
140 | ['x', 'b', 'o', 'x']
141 | ['h', 'e', 'i', 'r', ..]
142 | ['h', 'o', 'n', 'o', 'r', ..]
143 | ['h', 'o', 'n', 'e', 's', ..]
144 ) {
145 return Some(InitialSound::Vowel);
146 }
147
148 if matches!(
149 word,
150 ['j', 'u' | 'o', 'n', ..] | ['j', 'u', 'r', 'a' | 'i' | 'o', ..]
151 ) {
152 return Some(InitialSound::Consonant);
153 }
154
155 if matches!(word, ['x', '-' | '\'' | '.' | 'o' | 's', ..]) {
156 return Some(InitialSound::Vowel);
157 }
158
159 if word[0].is_vowel() {
160 return Some(InitialSound::Vowel);
161 }
162
163 Some(InitialSound::Consonant)
164}
165
166fn to_lower_word(word: &[char]) -> Cow<'_, [char]> {
167 if word.iter().any(|c| c.is_uppercase()) {
168 Cow::Owned(
169 word.iter()
170 .flat_map(|c| c.to_lowercase())
171 .collect::<Vec<_>>(),
172 )
173 } else {
174 Cow::Borrowed(word)
175 }
176}
177
178fn is_likely_acronym(word: &[char]) -> bool {
179 fn word_contains_false_positive_sequence(word: &[char]) -> bool {
181 let likely_false_positive_sequences = [['V', 'C']];
182 for fp_sequence in likely_false_positive_sequences {
183 if word
184 .windows(fp_sequence.len())
185 .any(|subslice| subslice == fp_sequence)
186 {
187 return true;
188 }
189 }
190 false
191 }
192
193 const MIN_LEN: usize = 3;
195
196 if let Some(first_chars) = word.get(..MIN_LEN)
197 && first_chars.iter().copied().all(char::is_alphabetic)
199 && !word_contains_false_positive_sequence(word)
200 {
201 let vowel_map = first_chars
202 .iter()
203 .map(CharExt::is_vowel)
204 .collect_array::<MIN_LEN>()
205 .unwrap();
206 matches!(vowel_map, [false, true, false] | [false, true, true])
207 } else {
208 false
209 }
210}