1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#![doc(html_root_url = "https://docs.rs/igpay-atinlay/0.1.0")]

//! Text to [Pig Latin](https://en.wikipedia.org/wiki/Pig_Latin) conversion.
//!
//! This crate began as a solution to [Exercise
//! 8.2](https://doc.rust-lang.org/book/ch08-03-hash-maps.html#summary) in [The Rust Programming
//! Language](https://doc.rust-lang.org/book/). The innocuous-looking phrase "Keep in mind the
//! details about UTF-8 encoding!" in that exercise conceals a world of pain.
//!
//! At the end of the day, Pig Latin is not a well-defined dialect. This program will produce
//! questionable results for complicated cases and non-Romance languages (for which the
//! underlying crate has no idea of extra vowels). It will probably produce nonsense answers on
//! "exotic" languages.
//!
//! # Authors
//!
//! * Reddit [/u/AlexRodger](https://reddit.com/u/AlexRodger) wrote the [original
//!   code](https://www.reddit.com/r/learnrust/comments/mo5lvd/rate_and_critic_my_solution_to_exercise_2_in/).
//!
//! * Reddit [/u/hjd_thd](https://reddit.com/u/hjd_thd) rewrote to get an [FP
//!   solution](https://www.reddit.com/r/learnrust/comments/mo5lvd/rate_and_critic_my_solution_to_exercise_2_in/gu1w5s6).
//!
//! * Reddit [/u/po8](https://reddit.com/u/po8) rewrote again into a more production-grade
//!   version. The rewrite ended up being pretty from-scratch.

use is_vowel::IsRomanceVowel;

use regex::Regex;

/// Transformer from text to Pig Latin.
#[derive(Debug, Clone)]
pub struct IgpayAtinlay {
    re: Regex,
    vowel_suffix: String,
}

impl IgpayAtinlay {
    /// Create a new Pig Latin translator.
    ///
    /// `vowel_suffix` is the string appended before "ay" after words beginning with a vowel:
    /// this is usually "w" or "h", but also might be "j", "" or any of a number of other
    /// things depending on the Pig Latin "dialect".
    ///
    /// Iff `split_hyphens` is `true`, treat hyphens as word boundaries.
    pub fn new(vowel_suffix: &str, split_hyphens: bool) -> Self {
        let vowel_suffix = vowel_suffix.to_string();
        let hyphen = if split_hyphens { "" } else { "-" };
        // XXX Should perhaps have other connecting punctuation not covered by the Unicode
        // tables here?
        let word_re = format!(
            r"(?x)
                \p{{Alphabetic}} (
                    ( \p{{Alphabetic}}
                      | \p{{Join_Control}}
                      | \p{{Mark}}
                      | \p{{Connector_Punctuation}}
                      | [{}'’]
                    )*
                  \p{{Alphabetic}} )?
            ",
            hyphen
        );
        let re = Regex::new(&word_re).unwrap();
        Self { vowel_suffix, re }
    }

    /// Transform `word` to [Pig Latin](https://en.wikipedia.org/wiki/Pig_Latin). Word is
    /// assumed to be in a [Romance language](https://en.wikipedia.org/wiki/Romance_languages):
    /// see `[IsRomanceVowel][IsRomanceVowel]::[is_romance_vowel][is_romance_vowel]` for the
    /// definition of "vowel" used here. Leading non-alphabetic characters will be stripped,
    /// trailing will be preserved.
    ///
    /// # Examples
    ///
    ///     # use igpay_atinlay::IgpayAtinlay;
    ///     let pig = IgpayAtinlay::new("h", false);
    ///     assert_eq!(pig.word_to_pig_latin("Aye"), "Ayehay");
    ///     let pig = IgpayAtinlay::new("", false);
    ///     assert_eq!(pig.word_to_pig_latin("Aye"), "Ayeay");
    ///     let pig = IgpayAtinlay::new("w", false);
    ///     assert_eq!(pig.word_to_pig_latin("argle-bargle"), "argle-bargleway");
    ///     let pig = IgpayAtinlay::new("w", false);
    ///     assert_eq!(pig.word_to_pig_latin("ding-dong"), "ing-dongday");
    ///     assert_eq!(pig.word_to_pig_latin("*ding-dong*"), "ing-dong*day");
    pub fn word_to_pig_latin(&self, word: &str) -> String {
        let mut chars = word.chars().skip_while(|c| !c.is_alphabetic());
        let first = chars.next();
        let mut result: String = match first {
            Some(first) => {
                if first.is_romance_vowel() {
                    let mut result = word.to_string();
                    result += &self.vowel_suffix;
                    result
                } else if first.is_uppercase() {
                    let (_, max_hint) = chars.size_hint();
                    let max_hint = max_hint.unwrap_or(32);
                    let mut result = String::with_capacity(max_hint);
                    if let Some(second) = chars.next() {
                        result.extend(second.to_uppercase());
                    }
                    result.extend(chars);
                    result.extend(first.to_lowercase());
                    result
                } else {
                    let mut result: String = chars.collect();
                    result.push(first);
                    result
                }
            }
            None => {
                return word.to_string();
            }
        };
        result += "ay";
        result
    }

    /// Map alphabetic words in `text` using the `word_processor` mapping function.
    fn map_words<F>(&self, text: &str, mut word_processor: F) -> String
    where
        F: FnMut(&str) -> String,
    {
        self.re
            .replace_all(text, |w: &regex::Captures| {
                word_processor(w.get(0).unwrap().as_str())
            })
            .to_string()
    }

    /// Transform `text` to Pig Latin
    ///
    /// # Examples
    ///
    ///     # use igpay_atinlay::IgpayAtinlay;
    ///     let pig = IgpayAtinlay::new("h", false);
    ///     let pigtext=pig.text_to_pig_latin("Can't touch this! Awoo-away!");
    ///     assert_eq!(pigtext, "An'tcay ouchtay histay! Awoo-awayhay!");
    pub fn text_to_pig_latin(&self, text: &str) -> String {
        self.map_words(text.as_ref(), |w| self.word_to_pig_latin(w))
    }
}