okkhor/
parser.rs

1use crate::{
2    models::{Match, Match::*, MatchType::*, Pattern},
3    patterns::PHONETIC_PATTERNS,
4};
5use std::collections::BTreeMap;
6
7fn conditional_lowercase(c: char) -> char {
8    const CASE_SENSITIVE_CHARS: &str = "oiudgjnrstyz";
9    let lowercase_c = c.to_ascii_lowercase();
10    if CASE_SENSITIVE_CHARS.contains(lowercase_c) {
11        c
12    } else {
13        lowercase_c
14    }
15}
16
17fn is_vowel(c: char) -> bool {
18    match c {
19        'a' | 'e' | 'i' | 'o' | 'u' | 'A' | 'E' | 'I' | 'O' | 'U' => true,
20        _ => false,
21    }
22}
23
24fn is_consonant(c: char) -> bool {
25    !is_vowel(c) && c.is_ascii_alphabetic()
26}
27
28fn is_punctuation(c: char) -> bool {
29    !c.is_ascii_alphabetic()
30}
31
32fn does_match(_match: &Match, prefix: char, suffix: char) -> bool {
33    match _match {
34        PrefixIs(Vowel) => is_vowel(prefix),
35        PrefixIsNot(Vowel) => !is_vowel(prefix),
36        PrefixIs(Consonant) => is_consonant(prefix),
37        PrefixIsNot(Consonant) => !is_consonant(prefix),
38        PrefixIs(Punctuation) => is_punctuation(prefix),
39        PrefixIsNot(Punctuation) => !is_punctuation(prefix),
40        PrefixIs(Number) => prefix.is_ascii_digit(),
41        PrefixIsNot(Number) => !prefix.is_ascii_digit(),
42        PrefixIs(Char(c)) => (*c == prefix),
43        PrefixIsNot(Char(c)) => (*c != prefix),
44        SuffixIs(Vowel) => is_vowel(suffix),
45        SuffixIsNot(Vowel) => !is_vowel(suffix),
46        SuffixIs(Consonant) => is_consonant(suffix),
47        SuffixIsNot(Consonant) => !is_consonant(suffix),
48        SuffixIs(Punctuation) => is_punctuation(suffix),
49        SuffixIsNot(Punctuation) => !is_punctuation(suffix),
50        SuffixIs(Number) => suffix.is_ascii_digit(),
51        SuffixIsNot(Number) => !suffix.is_ascii_digit(),
52        SuffixIs(Char(c)) => (*c == suffix),
53        SuffixIsNot(Char(c)) => (*c != suffix),
54    }
55}
56
57pub struct Parser {
58    patterns: BTreeMap<&'static str, &'static Pattern>,
59}
60
61impl Parser {
62    pub fn new_phonetic() -> Parser {
63        Self::new(PHONETIC_PATTERNS)
64    }
65
66    pub(crate) fn new(patterns_input: &'static [Pattern]) -> Parser {
67        let patterns = patterns_input
68            .iter()
69            .map(|p| (p.find, p))
70            .collect::<BTreeMap<_, _>>();
71        Parser { patterns }
72    }
73
74    pub fn convert(&self, raw_input: &str) -> String {
75        let mut output = String::with_capacity(64);
76        self.convert_into(raw_input, &mut output);
77        output
78    }
79
80    pub fn convert_into(&self, raw_input: &str, output: &mut String) {
81        let input: String = raw_input.chars().map(conditional_lowercase).collect();
82
83        let mut prefix = ' ';
84        let mut input = &input[0..];
85
86        output.clear();
87        while !input.is_empty() {
88            match self.find_pattern(input) {
89                Some(pattern) => {
90                    output.push_str(pattern.get_replacement(input, prefix));
91                    prefix = pattern.find.chars().last().unwrap();
92                    input = &input[pattern.find.len()..];
93                }
94                None => {
95                    prefix = input.chars().next().unwrap();
96                    output.push(prefix);
97                    input = &input[1..];
98                }
99            }
100        }
101    }
102
103    pub(crate) fn find_pattern(&self, input: &str) -> Option<&Pattern> {
104        self.patterns
105            .range(..=input)
106            .rfind(|(&k, _)| input.starts_with(k))
107            .map(|(_, &p)| p)
108    }
109}
110
111impl Pattern {
112    pub(crate) fn get_replacement(&self, input: &str, prefix: char) -> &str {
113        if self.rules.is_empty() {
114            self.default_replacement
115        } else {
116            let suffix = input.chars().nth(self.find.len()).unwrap_or(' ');
117
118            let matched_rule = self.rules.iter().find(|rule| {
119                rule.when_matches
120                    .iter()
121                    .all(|m| does_match(m, prefix, suffix))
122            });
123
124            match matched_rule {
125                Some(rule) => rule.replace_with,
126                None => self.default_replacement,
127            }
128        }
129    }
130}