verba 0.5.1

A library for working with Latin words.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
use unicode_segmentation::UnicodeSegmentation;

use std::fmt;

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Gender {
    Masculine,
    Feminine,
    Neuter,
}

impl fmt::Display for Gender {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            Gender::Masculine => write!(f, "masculine"),
            Gender::Feminine => write!(f, "feminine"),
            Gender::Neuter => write!(f, "neuter"),
        }
    }
}

#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum Case {
    Nominative,
    Genitive,
    Dative,
    Accusative,
    Ablative,
    Vocative,
//    Locative,
}

impl fmt::Display for Case {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            Case::Nominative => write!(f, "nominative"),
            Case::Genitive => write!(f, "genitive"),
            Case::Dative => write!(f, "dative"),
            Case::Accusative => write!(f, "accusative"),
            Case::Ablative => write!(f, "ablative"),
            Case::Vocative => write!(f, "vocative"),
        }
    }
}

/// Regular second declension masculine nouns ending in -ius and neuter 
/// nouns ending in -ium decline slightly differently from other second
/// declension nouns. This function returns true if a noun ends in -ius or
/// -ium. 
/// 
/// # Warning
/// 
/// This function is only guaranteed to work as expected if it's called by
/// a regular second declension noun. 
pub(crate) fn is_ius(nominative: &str) -> bool {
    nominative.ends_with("ius") || nominative.ends_with("ium")
}

/// The singular vocative case of regular second declension masculine nouns
/// ending in -r is different than for other second declension nouns. This 
/// function returns true if a noun ends in -r.
/// 
/// # Warning
/// 
/// This function is only guaranteed to work as expected if it's called by
/// a regular second declension noun. 
pub(crate) fn is_r(nominative: &str) -> bool {
    nominative.ends_with('r')
}

/// This function attempts to determine whether a passed in third declension 
/// noun or adjective is a parisyllabic 'i-stem.' The rules checked against are
///  from Wheelock's Latin and are as follows: 
/// 
/// 1. Masculine and feminine nouns and adjectives that end in -is or -es that 
///    have the same number of syllables in both their nominative and genitive 
///    forms. 
/// 
/// # Warning
/// 
/// This function likely won't be 100 percent accurate. Human language is 
/// always inconsistent. If this function ends up providing more false 
/// positives than I'm comfortable with, I will likely remove it and instead 
/// store whether or not a noun or adjective is an i-stem in the word database.
/// Don't depend on this function too much as it may go away. 
pub fn is_i_stem_parisyllabic(nominative: &str, genitive: &str, gender: Gender) -> bool {
    match gender {
        Gender::Masculine | Gender::Feminine => {
            // The parisyllabic rule states that masculing and feminine nouns 
            // and adjectives whose nominative case ends with -is or -es and 
            // whose nominative and genitive cases have the same number of 
            // syllables are i-stem.
            if nominative.ends_with("is") || nominative.ends_with("ēs") {
                syllables(nominative) == syllables(genitive)
            } else {
                false
            }
        },
        Gender::Neuter => false
    }
}

/// This function attempts to determine whether a passed in third declension 
/// noun or adjective is a monosyllabic 'i-stem.' The rules checked against are
///  from Wheelock's Latin and are as follows: 
/// 
/// 1. Masculine and feminine nouns and adjectives that end in -s or -x with a 
///    stem ending in two consonants. 
/// 
/// # Warning
/// 
/// This function likely won't be 100 percent accurate. Human language is 
/// always inconsistent. If this function ends up providing more false 
/// positives than I'm comfortable with, I will likely remove it and instead 
/// store whether or not a noun or adjective is an i-stem in the word database. 
/// Don't depend on this function too much as it may go away. 
pub fn is_i_stem_two_consonant_ending(nominative: &str, stem: &str, gender: Gender) -> bool {
    match gender {
        // Masculine and feminine third declension nouns and adjectives that 
        // have a singular nominative that ends with -s or -x and a stem that 
        // ends in two consonants is also an i-steam. 
        Gender::Masculine | Gender::Feminine => {                
            if (nominative.ends_with('s') || nominative.ends_with('x')) && syllables(nominative) == 1 {
                // Check if the last two graphemes of the stem are consonants. 
                stem.graphemes(true).rev().take(2).all(|g| !is_vowel(g))
            } else if (nominative.ends_with("ns") || nominative.ends_with("rs")) && syllables(nominative) > 1 {
                true
            } else {
                false
            }
        },
        Gender::Neuter => false,
    }
}

/// This function attempts to determine whether a passed in third declension 
/// noun or adjective is a neuter 'i-stem.' The rules checked against are from
/// Wheelock's Latin and are as follows: 
/// 
/// 1. Neuter nouns and adjectives that end in -al, -ar, or -e. 
/// 
/// # Warning
/// 
/// This function likely won't be 100 percent accurate. Human language is 
/// always inconsistent. If this function ends up providing more false 
/// positives than I'm comfortable with, I will likely remove it and instead 
/// store whether or not a noun or adjective is an i-stem in the word database. 
/// Don't depend on this function too much as it may go away. 
pub fn is_i_stem_neuter(nominative: &str, gender: Gender) -> bool {
    match gender {
        Gender::Neuter if nominative.ends_with("al") || nominative.ends_with("ar") || nominative.ends_with('e') => true,
        _ => false,
    }
}

/// Returns true if a character is a vowel, otherwise it returns false.
pub(crate) fn is_vowel(character: &str) -> bool {
    match character {
        "a" | "ā" | "e" | "ē" | "i" | "ī" | "o" | "ō" | "u" | "ū" => true,
        _ => false,
    }
}

/// The singular genitive and dative endings for fifth declension nouns differ
/// depending on whether the stem ends with a consonant or a vowel. This 
/// function determines if a string reference ends with a vowel. While it is 
/// primarily aimed at declining fifth declension nouns, it will work for any 
/// string. 
pub(crate) fn does_end_with_vowel(stem: &str) -> bool {
    let mut graphemes = stem.graphemes(true).rev();

    match graphemes.next() {
        Some(grapheme) => is_vowel(grapheme),
        None => false,
    }
}

/// In order to verify wheter a regular third declension noun is a pure i-stem,
/// the number of syllables must be counted. This would be straight forward if
/// it wasn't for the factthat Classical Latin has six diphthongs: ae, au, ei,
/// eu, oe, and ui. 
/// 
/// This function takes a word and counts the number of syllables, making sure 
/// to only count diphthongs as a single syllable. 
pub(crate) fn syllables(word: &str) -> usize {
    // Because we need to check the next character, we need a peekable
    // iterator. 
    let mut iter = word.graphemes(true).peekable();
    let mut count: usize = 0;
    
    while let Some(c) = iter.next() {
        // If the current character is a vowel, increament the syllable count
        // by one. 
        if is_vowel(c) {
            count += 1;
            
            // Moreover, it could be the first part of a diphthong. Peek at the
            // next character, and check if, when combined with the current 
            // character, forms a diphthong.
            if let Some(next) = iter.peek() {

                // If it does form a diphthong, skip over it for the  next
                // iteration so it's not counted twice.                 
                if is_diphthong(c, *next) {
                    iter.next();
                }
            }
        }
    }

    count
}

/// Takes two characters and determines if they form a diphthong when combined. 
pub(crate) fn is_diphthong(first: &str, second: &str ) -> bool {
    match first {
        "a" | "ā" => {
            match second {
                "e" | "ē" | "u" | "ū" => true,
                _ => false,
            }
        },
        "e" | "ē" => {
            match second {
                "i" | "ī" | "u" | "ū" => true,
                _ => false,
            }
        },
        "o" | "ō" => {
            match second {
                "e" | "ē" => true,
                _ => false,
            }
        },
        "u" | "ū" => {
            match second {
                "i" | "ī" => true,
                _ => false,
            }
        },
        _ => false,
    }
}

/// A helper function that attempts to detect whether or not a third declension
/// noun is an i-stem.
/// 
/// Third declension nouns can be either a consonant stem or an i-stem. If a 
/// third declension noun is an i-stem, it will decline differently in a few
/// cases. 
/// 
/// This function uses heuristics described in Wheelock's Latin to determine if
/// a noun is an i-stem. These rules are:
/// 
/// 1. Masculine and feminine nouns and adjectives that end in -is or -es that 
///    have the same number of syllables in both their nominative and genitive 
///    forms (usually referred to as parisyllabic).
/// 
/// 2. Masculine and feminine nouns and adjectives that end in -s or -x with a 
///    stem ending in two consonants (usually referred to as nouns with two 
///    consonant endings).
/// 
/// 3. Neuter nouns and adjectives that end in -al, -ar, or -e. 
/// 
/// # Warning
/// 
/// This function uses heuristics, not hard rules. There are third declension 
/// nouns such as canis, canis that are considered i-stems by these heuristics
/// but decline as consonant stems. 
/// 
/// # Example
/// ```
/// use verba::noun as N;
/// 
/// assert_eq!(N::is_i_stem("amnis", "amnis", "amn", N::Gender::Masculine), true);
/// assert_eq!(N::is_i_stem("animal", "animālis", "animāl", N::Gender::Neuter), true);
/// assert_eq!(N::is_i_stem("pars", "partis", "part", N::Gender::Feminine), true);
/// ``` 
pub fn is_i_stem(nominative: &str, genitive: &str, stem: &str, gender: Gender) -> bool {
    is_i_stem_parisyllabic(&nominative, &genitive, gender) ||
    is_i_stem_two_consonant_ending(&nominative, stem, gender) ||
    is_i_stem_neuter(&nominative, gender) 
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_is_vowel() {
        let vowels = "aāeēiīoōuū";
        let consonants = "bcdfghjklmnpqrstvwxyz";

        for v in vowels.graphemes(true) {
            assert_eq!(is_vowel(v), true);
        }

        for c in consonants.graphemes(true) {
            assert_eq!(is_vowel(c), false);
        }
    }

    #[test]
    fn test_syllables() {
        // Animalia is the plural nominative form of the third declension noun 
        // animal. It's a good candidate for testing since it has a lot of 
        // syllables.
        assert_eq!(syllables("animalia"), 5);

        // But what if the Romans spelled it aenimalia? Since ae is a
        // diphthong, it should only count as a single syllable and thus the
        // total should remain five. 
        assert_eq!(syllables("aenimalia"), 5);

        // And what if they decided to jam an e before the first i? That too is
        // a diphthong and shouldn't change the number of syllables. 
        assert_eq!(syllables("aneimalia"), 5);

        // But if they reversed the e and the i, it wouldn't be a diphthong and
        // should thus increase the syllable count. 
        assert_eq!(syllables("aniemalia"), 6);
    }

    #[test]
    fn test_is_i_stem_hostis() {
        let nom = "hostis";
        let gen = "hostis";

        assert_eq!(is_i_stem_parisyllabic(nom, gen, Gender::Masculine), true);
    }

    #[test]
    fn test_is_i_stem_navis() {
        let nom = "nāvis";
        let gen = "nāvis";

        assert_eq!(is_i_stem_parisyllabic(nom, gen, Gender::Feminine), true);
    }

    #[test]
    fn test_is_i_stem_moles() {
        let nom = "mōlēs";
        let gen = "mōlis";

        assert_eq!(is_i_stem_parisyllabic(nom, gen, Gender::Feminine), true);
    }

    #[test]
    fn test_is_i_stem_civis() {
        let nom = "cīvis";
        let gen = "cīvis";

        assert_eq!(is_i_stem_parisyllabic(nom, gen, Gender::Masculine), true);
    }

    #[test]
    fn test_is_i_stem_nubes() {
        let nom = "nūbēs";
        let gen = "nūbis";

        assert_eq!(is_i_stem_parisyllabic(nom, gen, Gender::Feminine), true);
    }

        #[test]
    fn test_is_i_stem_animal() {
        let nom = "animal";

        assert_eq!(is_i_stem_neuter(nom, Gender::Neuter), true);
    }

    #[test]
    fn test_is_i_stem_exemplar() {
        let nom = "exemplar";

        assert_eq!(is_i_stem_neuter(nom, Gender::Neuter), true);
    }

    #[test]
    fn test_is_i_stem_mare() {
        let nom = "mare";

        assert_eq!(is_i_stem_neuter(nom, Gender::Neuter), true);
    }

    #[test]
    fn test_is_i_stem_ars() {
        let nom = "ars";
        let stem = "art";

        assert_eq!(is_i_stem_two_consonant_ending(nom, stem, Gender::Feminine), true);
    }

    #[test]
    fn test_is_i_stem_dens() {
        let nom = "dēns";
        let stem = "dent";

        assert_eq!(is_i_stem_two_consonant_ending(nom, stem, Gender::Masculine), true);
    }

    #[test]
    fn test_is_i_stem_nox() {
        let nom = "nox";
        let stem = "noct";

        assert_eq!(is_i_stem_two_consonant_ending(nom, stem, Gender::Feminine), true);
    }

    #[test]
    fn test_is_i_stem_urbs() {
        let nom = "urbs";
        let stem = "urb";

        assert_eq!(is_i_stem_two_consonant_ending(nom, stem, Gender::Feminine), true);
    }

    /// There was a bug in is_i_stem_two_consonant_ending where it was 
    /// reporting virtūs, virtūtis as a mixed i-stem. This was due to using 
    /// String.len() to skip over all but the last two characters. Since 'ū' is
    /// a multibyte grapheme and String.len() goes by bytes,
    /// is_i_stem_two_consonant_ending was only checking the last character. 
    /// 
    /// This test is here to ensure that bug doesn't creep up again. 
    #[test]
    fn test_is_i_stem_virtus() {
        let nom = "virtūs";
        let stem = "virtūt";

        assert_eq!(is_i_stem_two_consonant_ending(nom, stem, Gender::Feminine), false);
    }

    #[test]
    fn test_is_i_stem() {
        assert_eq!(is_i_stem("amnis", "amnis", "amn", Gender::Masculine), true, "is_i_stem failed to detect the pure i-stem masculine noun amnis, amnis.");
        assert_eq!(is_i_stem("animal", "animālis", "animāl", Gender::Neuter), true, "is_i_stem failed to detect the pure i-stem neuter noun animal, animālis.");
        assert_eq!(is_i_stem("pars", "partis", "part", Gender::Feminine), true, "is_i_stem failed to detect the mixed i-stem feminine noun pars, partis.");
    }
}