syllable/
lib.rs

1use std::{cmp, collections::HashMap};
2
3mod data;
4
5#[derive(Clone, Debug)]
6pub struct Counter {
7    dictionary: HashMap<&'static str, usize>,
8    cache: HashMap<String, usize>,
9}
10
11impl Counter {
12    pub fn new() -> Self {
13        let dictionary: HashMap<&'static str, usize> = data::SYLLABLE_DATA
14            .iter()
15            .map(|&(word, count)| (word, count))
16            .collect();
17
18        Self {
19            dictionary,
20            cache: HashMap::new(),
21        }
22    }
23
24    /// Count the number of syllables in a word.
25    ///
26    /// Returns zero in the event of a problem with the word.
27    pub fn count(&mut self, word: &str) -> usize {
28        let word = word
29            .trim_matches(|u: char| u.is_ascii_punctuation())
30            .to_ascii_lowercase();
31
32        if word.is_empty() || word.bytes().any(|u| !u.is_ascii_alphabetic()) {
33            return 0;
34        }
35
36        if let Some(known_count) = self.cached_count(&*word) {
37            return known_count;
38        }
39
40        let syllable_count = get_syllable_count(&word);
41        self.cache.insert(word, syllable_count);
42        syllable_count
43    }
44
45    fn cached_count(&self, word: &str) -> Option<usize> {
46        self.dictionary
47            .get(word)
48            .or_else(|| self.cache.get(word))
49            .copied()
50    }
51}
52
53impl Default for Counter {
54    fn default() -> Self {
55        Counter::new()
56    }
57}
58
59fn get_syllable_count(word: &str) -> usize {
60    // Original syllapy count algo copied for reference:
61    //
62    // syllable_count = 0
63    // vowels = "aeiouy"
64    // if word[0] in vowels:
65    //     syllable_count += 1
66    // for index in range(1, len(word)):
67    //     if word[index] in vowels and word[index - 1] not in vowels:
68    //         syllable_count += 1
69    // if word.endswith("e"):
70    //     syllable_count -= 1
71    // if word.endswith("le") and len(word) > 2 and word[-3] not in vowels:
72    //     syllable_count += 1
73    // if syllable_count == 0:
74    //     syllable_count += 1
75    // return syllable_count
76
77    fn is_vowel(u: char) -> bool {
78        match u {
79            'a' | 'e' | 'i' | 'o' | 'u' | 'y' => true,
80            _ => false,
81        }
82    }
83
84    let characters: Vec<_> = word.chars().collect();
85
86    let mut syllable_count = 0;
87
88    if is_vowel(characters[0]) {
89        syllable_count += 1;
90    }
91
92    for window in characters.windows(2) {
93        let left = window[0];
94        let right = window[1];
95        if is_vowel(right) && !is_vowel(left) {
96            syllable_count += 1;
97        }
98    }
99
100    if word.ends_with('e') {
101        syllable_count -= 1;
102    }
103
104    if word.ends_with("le") && word.len() > 2 && !is_vowel(characters[word.len() - 4]) {
105        syllable_count += 1;
106    }
107
108    cmp::max(1, syllable_count)
109}
110
111#[cfg(test)]
112mod tests {
113    use crate::Counter;
114
115    #[test]
116    fn can_initialize() {
117        let _ = Counter::new();
118    }
119
120    #[test]
121    fn can_count() {
122        // FIXME: missing test cases for some punctuation
123        static TEST_CASES: &[(&'static str, usize)] = &[
124            ("dog!!!!!", 1),
125            ("d0g", 0),
126            ("4dog", 0),
127            ("dog123", 0),
128            ("", 0),
129            (" ", 0),
130            ("because", 2),
131            ("woman", 2),
132            ("international", 5),
133            ("ostentatious", 4),
134            ("Norway", 2),
135            ("norway", 2),
136            ("Ohio", 3),
137            ("ohio", 3),
138        ];
139
140        let mut counter = Counter::new();
141        for &(word, expected) in TEST_CASES {
142            let actual = counter.count(word);
143            assert_eq!(
144                actual, expected,
145                "{} (actual: {}; expected: {})",
146                word, actual, expected
147            );
148        }
149    }
150}