verba 0.5.1 - Docs.rs

mod endings;

pub mod regular;

use unicode_normalization::{UnicodeNormalization};
use unicode_segmentation::{UnicodeSegmentation};

use crate::unicode as U;
use crate::adjective::{Adjective};
use crate::adjective as A;
use crate::decline as D;

pub use crate::verb::regular::{Regular};
pub use crate::inflection::{Number};

pub trait Verb {
    fn present_stem(&self) -> Option<&str>;
    fn perfect_stem(&self) -> Option<&str>;
    fn group(&self) -> Option<Group>;
    fn conjugate(&self, person: Person, number: Number, tense: Tense, voice: Voice, mood: Mood) -> Option<Conjugation>;
}

/// Verb conjugations can be a bit tricky. While a verb conjugation is usually
/// a combination of a stem and a personal ending, the passive perfect form of
/// indicative and subjunctive conjugations involve declining a adjective form.
/// 
/// To represent these two possible options, the conjugate function of the 
/// Verb trait returns a Conjugation, which can be either a complete
/// conjugation or a reference to a verb's adjective and the appropriate verb
/// form. 
pub enum Conjugation<'a> {
    PassivePerfect(&'a A::Regular, &'a str),
    Complete(Vec<String>),
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Voice {
    Active,
    Passive,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Tense {
    Present,
    Imperfect,
    Future,
    Perfect,
    Pluperfect,
    FuturePerfect,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Mood {
    Indicative,
    Subjunctive,
    Imperative,
    Infinitive,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Group {
    First,
    Second,
    Third,
    Fourth,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Person {
    First,
    Second,
    Third,
}

impl<'a> Conjugation<'a> {
    pub fn adjective(&'a self) -> Option<&'a A::Regular> {
        match self {
            Conjugation::PassivePerfect(adjective, _) => Some(adjective),
            Conjugation::Complete(_) => None,
        }
    }

    pub fn verb(&'a self) -> Option<&'a str> {
        match self {
            Conjugation::PassivePerfect(_, verb) => Some(verb),
            Conjugation::Complete(_) => None,
        }
    }

    /// If the [`Conjugation`] is a [`PassivePerfect`], the declined form is
    /// returned. If the [`Conjugation`] is a [`Complete`], None is returned. 
    /// 
    /// This function takes the [`Conjugation`] to conjugate as well as a 
    /// [`Number`], [`Case`], and [`Gender`] to create the adjective's declined 
    /// form. 
    pub fn decline(&self, number: Number, case: A::Case, gender: A::Gender) -> Option<Vec<String>> {
        match self {
            Conjugation::PassivePerfect(adjective, verb) => {
                let mut conjugations = Vec::new();

                match adjective.decline(number, case, gender) {
                    Some(declensions) => {
                        for declension in declensions {
                            conjugations.push(format!("{} {}", declension, verb));
                        }

                        Some(conjugations)
                    },
                    None => None,
                }
            },
            Conjugation::Complete(_) => None,
        }
    }
}

fn not_normalized_group(present_active_infinitive: &str) -> Option<Group> {
    if present_active_infinitive.ends_with("āre") {
        Some(Group::First)
    } else if present_active_infinitive.ends_with("ēre") {
        Some(Group::Second)
    } else if present_active_infinitive.ends_with("ere") {
        Some(Group::Third)
    } else if present_active_infinitive.ends_with("īre") {
        Some(Group::Fourth)
    } else {
        None
    }
}

fn not_normalized_present_stem(present_active_infinitive: &str) -> Option<&str> {
    match not_normalized_group(present_active_infinitive) {
        Some(Group::First) => Some(present_active_infinitive.trim_end_matches("āre")),
        Some(Group::Second) => Some(present_active_infinitive.trim_end_matches("ēre")),
        Some(Group::Third) => Some(present_active_infinitive.trim_end_matches("ere")),
        Some(Group::Fourth) => Some(present_active_infinitive.trim_end_matches("īre")),
        None => None,
    }

}

fn not_normalized_perfect_stem(perfect_active_indicative: &str) -> Option<&str> {
    if perfect_active_indicative.ends_with("ī") {
        Some(perfect_active_indicative.trim_end_matches("ī"))
    } else {
        None
    }
}

fn not_normalized_participle_stem(perfect_passive_participle: &str) -> Option<&str> {
    if perfect_passive_participle.ends_with("um") {
       Some(perfect_passive_participle.trim_end_matches("um"))
    } else if perfect_passive_participle.ends_with("us") {
        Some(perfect_passive_participle.trim_end_matches("us"))
    } else if perfect_passive_participle.ends_with('a') {
        Some(perfect_passive_participle.trim_end_matches('a'))
    } else {
        None
    }
}

pub fn group(present_active_infinitive: &str) -> Option<Group> {
    let present_active_infinitive = U::normalize_if_needed(present_active_infinitive);

    not_normalized_group(present_active_infinitive.as_ref())
}

pub fn present_stem(present_active_infinitive: &str) -> Option<String> {
    let present_active_infinitive = U::normalize_if_needed(present_active_infinitive);

    match not_normalized_present_stem(present_active_infinitive.as_ref()) {
        Some(stem) => Some(stem.to_string()),
        None => None,
    }
}

pub fn perfect_stem(perfect_active_indicative: &str) -> Option<String> {
    let perfect_active_indicative = U::normalize_if_needed(perfect_active_indicative);

    match not_normalized_perfect_stem(perfect_active_indicative.as_ref()) {
        Some(stem) => Some(stem.to_string()),
        None => None,
    }
}

/// When composing conjugated forms, this function determines whether a 
/// fragment ending with a long vowel should be shortened or not. 
fn should_remove_last_macron(next: &str, is_last: bool) -> bool {
    // If the next fragment starts with "nt" or "nd" the macron should be 
    // removed.
    let mut is_short = next.starts_with("nt") || next.starts_with("nd");
    
    // If the first character in the next fragment is a vowel, the macron 
    // should be removed.
    if let Some(first) = UnicodeSegmentation::graphemes(next, true).next() {
        is_short = is_short || D::is_vowel(first);
    }

    // If the next fragment is t, m, or r and is the final fragment, the macron
    // should be removed.
    if is_last {
        is_short || next == "t" || next == "m" || next == "r"
    } else {
        is_short
    }
} 

/// Returns true if the last character in a string reference is a long vowel, 
/// otherwise it returns false.
fn ends_with_long_vowel(character: &str) -> bool {
    character.ends_with("ā") ||
    character.ends_with("ē") ||
    character.ends_with("ī") ||
    character.ends_with("ō") ||
    character.ends_with("ū") 
}

/// Takes a destination string and a slice of string references and ultimately
/// pushes the first element of the slice into the destination. However, if the
/// first element ends with a long vowel, it uses the following elements to 
/// determine whether the vowel should be shortened before pushing it. 
fn push_fragment(destination: &mut String, source: &[&str]) {
    // If there isn't at least one element in the source slice, there's 
    // nothing to do.
    if let Some(to_push) = source.get(0) {
        // If the element to be pushed doesn't end with a long vowel, it can be
        // pushed immediately. 
        if ends_with_long_vowel(to_push) {
            // If there aren't two elements in the source slice, to_push can be
            // pushed to destination as is. 
            if let Some(chunks) = source.get(0..2) {
                let next = chunks[1];

                // If source contains three elements, then next isn't the 
                // last element in the slice. Otherwise, it is.
                let is_short = if source.get(0..3).is_some() {
                    should_remove_last_macron(next, false)
                } else {
                    should_remove_last_macron(next, true)
                };

                // If the vowel should be short, push everything in to_push
                // except the ending \u{0304}. Otherwise, push to_push to 
                // destination.
                if is_short {
                    // Convert to_push into normalized decomposed form, take 
                    // all but the last element (\u{0304}), turn it back into
                    // normalized composed form, and push it to destination. 
                    destination.push_str(&to_push.nfd()
                        .take(to_push.len() - 1)
                        .nfc()
                        .collect::<String>()
                    );
                } else {
                    destination.push_str(to_push);
                }

            } else {
                destination.push_str(to_push);
            }

        } else {
            destination.push_str(to_push);
        }
    }
}

/// Constructing verb conjugations is quite a bit more complex, especially from
/// a computing standpoint, than declining nouns and adjectives. Verb 
/// conjugations can me made up of different pieces, which I'm referring to as
/// fragments in this library. 
/// 
/// For example, let's say you want to create the third person plural imperfect
/// active indicative form of laudāre. Since you're constructing the third 
/// person plural active form, the personal ending is nt. Since you're 
/// constructing the imperfect form, the tense fragment bā must be inserted.
/// But any long vowel followed by nt must be shortened so you need to insert
/// ba rather than bā. In the end you want laudābant.
/// 
/// To simply the process of going from the fragments provided by verb::endings
/// this process takes a stem and those fragments and turns them into a String.
/// 
/// # Warning
/// 
/// While the rules used to dermined if vowels should be shortened or not are 
/// fairly regular, they're not entirely regular. As far as I know, they should
/// be regular when creating conjugated forms of verbs from regular noun 
/// fragments, which is why this function is part of the verb module rather
/// than accessible outside of it. 
fn conjugate_from_fragments(stem: &str, fragments: &[&str]) -> String {
    let mut composed = String::from(stem);

    // A window of length three is needed because determining if an ending
    // vowel should be short requires knowing the fragment that comes after
    // it as well as whether the fragment that comes after it is the last
    // fragment in the chunk. 
    for fragment_chunks in fragments.windows(3) { 
        push_fragment(&mut composed, fragment_chunks);
    }

    // The previous for loop will miss the last two elements. Grab them by 
    // taking the first two elements from a reversed iterator. 
    let mut chunks = fragments.iter().rev().take(2);
    let last = chunks.next();
    let penultimate = chunks.next();

    // If there are two elements, push the penultimate one to composed.
    if let (Some(penultimate), Some(last)) = (penultimate, last) {
        push_fragment(&mut composed, &[penultimate, last]);
    }

    // Push the last element to composed.
    if let Some(last) = last {
        push_fragment(&mut composed, &[last]);
    }

    composed
}

// pub fn participle_stem<'a>(perfect_passive_participle: &'a str) -> Option<&'a str> {
//     let perfect_passive_participle = U::normalize_if_needed(perfect_passive_participle);

//     not_normalized_participle_stem(perfect_passive_participle.as_ref())
// }


// pub fn conjugation_to_string(conjugation: Conjugation) -> Option<Vec<String>> {
//     match conjugation {
//         Conjugation::Complete(conjugation) => Some(conjugation),
//         Conjugation::PassivePerfect(_, _) => None,
//     }
// }

#[cfg(test)]
mod test {
    use super::*;
    use crate::adjective as A;

    use unicode_normalization::{is_nfc};

    #[test]
    fn test_passive_perfect_to_string() {
        // It doesn't matter what adjective is used here so I'm using the 
        // perfect passive participle form of laudāre for this test.
        match A::Regular::new(A::DictionaryForm::Three("laudātus".to_string(), "laudāta".to_string(), "laudātum".to_string())) {
            Ok(adjective) => {
                let passive_perfect = Conjugation::PassivePerfect(&adjective, "sum");

                // This test isn't meant to be exhaustive, it is meant to 
                // ensure the output is in NFC form and that a returned value
                // matches an expected value. The unit tests in test/verb_test
                // perform exhaustive testing. 
                match passive_perfect.decline(Number::Singular, A::Case::Nominative, A::Gender::Masculine) {
                    Some(conjugations) => {
                        // Realistically `passive_perfect_to_string` only 
                        // returns a single value. It only returns a Vec to 
                        // maintained compatability with the Verb protocol.
                        match conjugations.first() {
                            Some(conjugation) => {
                                let expected = "laudātus sum";

                                assert!(is_nfc(conjugation), "Conjugation received from `passive_perfect_to_string` was not in NFC form.");
                                assert_eq!(conjugation, expected, "Received incorrect value from `passive_perfect_to_string`. Received {}, should've been {}.", conjugation, expected);
                            },
                            None => panic!("Received `None` from `passive_perfect_to_string. Should have received `Some`."),
                        }
                    },
                    None => panic!("Received `None` when testing `passive_perfect_to_string`.")

                }
            },
            Err(error) => panic!("Failed to create an adjective with which to test `passive_perfect_to_string`. Received the following error: {}", error),
        }
    }

    #[test]
    fn test_short_vowel_after_nt() {
        assert!(should_remove_last_macron("nt", true), "should_remove_last_macron reported false when given 'nt' as next and true as is_last.");
        assert!(should_remove_last_macron("nt", false), "should_remove_last_macron reported false when given 'nt' as next and false as is_last.");
    }

    #[test]
    fn test_short_vowel_after_nd() {
        assert!(should_remove_last_macron("nd", true), "should_remove_last_macron reported false when given 'nd' as next and true as is_last.");
        assert!(should_remove_last_macron("nd", false), "should_remove_last_macron reported false when given 'nd' as next and false as is_last.");
    }

    #[test]
    fn test_short_vowel_when_next_starts_with_vowel() {
        // Make sure should_remove_last_macron always returns true when the 
        // first letter of next is a vowel. 
        assert!(should_remove_last_macron("axxxx", true), "should_remove_last_macron reported false when next starts with 'a' and is_last is true.");
        assert!(should_remove_last_macron("axxxx", false), "should_remove_last_macron reported false when next starts with 'a' and is_last is false.");
        assert!(should_remove_last_macron("āxxxx", true), "should_remove_last_macron reported false when next starts with 'ā' and is_last is true.");
        assert!(should_remove_last_macron("āxxxx", false), "should_remove_last_macron reported false when next starts with 'ā' and is_last is false.");
        assert!(should_remove_last_macron("exxxx", true), "should_remove_last_macron reported false when next starts with 'e' and is_last is true.");
        assert!(should_remove_last_macron("exxxx", false), "should_remove_last_macron reported false when next starts with 'e' and is_last is false.");
        assert!(should_remove_last_macron("ēxxxx", true), "should_remove_last_macron reported false when next starts with 'ē' and is_last is true.");
        assert!(should_remove_last_macron("ēxxxx", false), "should_remove_last_macron reported false when next starts with 'ē' and is_last is false.");
        assert!(should_remove_last_macron("ixxxx", true), "should_remove_last_macron reported false when next starts with 'i' and is_last is true.");
        assert!(should_remove_last_macron("ixxxx", false), "should_remove_last_macron reported false when next starts with 'i' and is_last is false.");
        assert!(should_remove_last_macron("īxxxx", true), "should_remove_last_macron reported false when next starts with 'ī' and is_last is true.");
        assert!(should_remove_last_macron("īxxxx", false), "should_remove_last_macron reported false when next starts with 'ī' and is_last is false.");
        assert!(should_remove_last_macron("oxxxx", true), "should_remove_last_macron reported false when next starts with 'o' and is_last is true.");
        assert!(should_remove_last_macron("oxxxx", false), "should_remove_last_macron reported false when next starts with 'o' and is_last is false.");
        assert!(should_remove_last_macron("ōxxxx", true), "should_remove_last_macron reported false when next starts with 'ō' and is_last is true.");
        assert!(should_remove_last_macron("ōxxxx", false), "should_remove_last_macron reported false when next starts with 'ō' and is_last is false.");
        assert!(should_remove_last_macron("uxxxx", true), "should_remove_last_macron reported false when next starts with 'u' and is_last is true.");
        assert!(should_remove_last_macron("uxxxx", false), "should_remove_last_macron reported false when next starts with 'u' and is_last is false.");
        assert!(should_remove_last_macron("ūxxxx", true), "should_remove_last_macron reported false when next starts with 'ū' and is_last is true.");
        assert!(should_remove_last_macron("ūxxxx", false), "should_remove_last_macron reported false when next starts with 'ū' and is_last is false.");
    }

    #[test]
    fn test_short_vowel_after_t() {
        assert!(should_remove_last_macron("t", true), "should_remove_last_macron reported false when given 't' as next and true as is_last.");
        assert_eq!(should_remove_last_macron("t", false), false, "should_remove_last_macron reported true when given 't' as next and false as is_last.");
    }

    #[test]
    fn test_short_vowel_after_m() {
        assert!(should_remove_last_macron("m", true), "should_remove_last_macron reported false when given 'm' as next and true as is_last.");
        assert_eq!(should_remove_last_macron("m", false), false, "should_remove_last_macron reported true when given 'm' as next and false as is_last.");
    }

    #[test]
    fn test_short_vowel_after_r() {
        assert!(should_remove_last_macron("r", true), "should_remove_last_macron reported false when given 'r' as next and true as is_last.");
        assert_eq!(should_remove_last_macron("r", false), false, "should_remove_last_macron reported true when given 'r' as next and false as is_last.");
    }
}