iasthk/
lib.rs

1pub mod converter {
2    use unicode_normalization::UnicodeNormalization;
3    fn ascii_to_unicode<T: Into<String>>(input: T) -> String {
4        let mut output: String = input.into();
5        output = output
6            .replace('/', "\u{0301}")
7            .replace('\\', "\u{0300}")
8            .replace('A', "a\u{0304}")
9            .replace('I', "i\u{0304}")
10            .replace('U', "u\u{0304}")
11            .replace("lRR", "ḷ\u{0304}")
12            .replace("lR", "ḷ")
13            .replace("RR", "ṛ\u{0304}")
14            .replace('R', "ṛ")
15            .replace('M', "ṃ")
16            .replace('H', "ḥ")
17            .replace('G', "ṅ")
18            .replace('J', "ñ")
19            .replace('T', "ṭ")
20            .replace('D', "ḍ")
21            .replace('N', "ṇ")
22            .replace('z', "ś")
23            .replace('S', "ṣ")
24            .replace("||", "\u{0965}")
25            .replace('|', "\u{0964}");
26        output
27    }
28    fn normalize_unicode<T: Into<String>>(input: T) -> String {
29        let input: &str = &input.into();
30        input.nfkc().collect::<String>()
31    }
32    pub fn convert<T: Into<String>>(input: T) -> String {
33        let mut output = input.into();
34        output = ascii_to_unicode(output);
35        output = normalize_unicode(output);
36
37        output
38    }
39    #[cfg(test)]
40    mod test {
41        use super::*;
42        #[test]
43        fn test_convert() {
44            let string = String::from("A");
45            let result = convert(string);
46            assert_eq!(result, normalize_unicode("ā".to_string()));
47            let string = String::from("asti nRpo");
48            let result = convert(string);
49            assert_eq!(result, "asti nṛpo".to_string());
50            let string = String::from("RR");
51            let result = convert(string);
52            assert_eq!(result, normalize_unicode("ṝ".to_string()));
53        }
54        #[test]
55        fn unicode_normalized() {
56            let input = String::from("a/sti");
57            let output = normalize_unicode("ásti");
58            let result = convert(input);
59            assert_eq!(result, output);
60        }
61    }
62}
63
64/// Validation module for Harvard-Kyoto texts
65pub mod validator {
66    use regex::Regex;
67    use std::fmt;
68
69    #[derive(Debug)]
70    pub enum ValidationError {
71        NotASCII(Vec<char>),
72        InvalidChars(Vec<char>),
73        InvalidDiacriticOrder(Vec<String>),
74    }
75    impl fmt::Display for ValidationError {
76        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
77            match self {
78                ValidationError::NotASCII(a) => write!(f, "Non ASCII chars {:?}", a),
79                ValidationError::InvalidChars(a) => write!(f, "Invalid characteres {:?}", a),
80                ValidationError::InvalidDiacriticOrder(a) => {
81                    write!(f, "Invalid diacritic order: {:?}", a)
82                }
83            }
84        }
85    }
86
87    fn diacritics_ordered<T: Into<String>>(input: T) -> Result<(), ValidationError> {
88        let input: String = input.into();
89        let re = Regex::new(r"[bcdghjklmprstvzGHJLMS][/\\=]").unwrap();
90
91        let matches: Vec<regex::Match> = re.find_iter(&input).collect();
92        match matches.len() {
93            0 => Ok(()),
94            _ => {
95                let v: Vec<String> = matches
96                    .into_iter()
97                    .map(|m| m.as_str().to_string())
98                    .collect();
99                Err(ValidationError::InvalidDiacriticOrder(v))
100            }
101        }
102    }
103
104    fn standard_characteres<T: Into<String>>(input: T) -> Result<(), ValidationError> {
105        let input: String = input.into();
106        let valid_chars = vec![
107            'a', 'b', 'c', 'd', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's',
108            't', 'u', 'v', 'z', 'A', 'G', 'H', 'I', 'J', 'L', 'M', 'N', 'R', 'S', 'U', '/', '\\',
109            '\n', '-', '|', ' ',
110        ];
111        match input.chars().all(|c| valid_chars.contains(&c)) {
112            true => Ok(()),
113            false => {
114                let mut invalid_chars: Vec<char> =
115                    input.chars().filter(|c| !valid_chars.contains(c)).collect();
116                invalid_chars.dedup();
117                Err(ValidationError::InvalidChars(invalid_chars))
118            }
119        }
120    }
121
122    pub fn validate<T: Into<String>>(input: T) -> Result<(), ValidationError> {
123        let input: String = input.into();
124
125        check_ascii(&input)?;
126        diacritics_ordered(&input)?;
127        standard_characteres(input)?;
128        Ok(())
129    }
130
131    fn check_ascii<T: Into<String>>(input: T) -> Result<(), ValidationError> {
132        let input: String = input.into();
133
134        if !input.is_ascii() {
135            let mut non_ascii_chars: Vec<char> = input.chars().filter(|c| !c.is_ascii()).collect();
136            non_ascii_chars.dedup();
137            Err(ValidationError::NotASCII(non_ascii_chars))
138        } else {
139            Ok(())
140        }
141    }
142
143    #[cfg(test)]
144    mod test {
145        use crate::validator;
146        #[test]
147        fn validation() {
148            assert!(validator::validate("agnimiLepurohitaM").is_ok());
149            assert!(validator::validate("ab=").is_err());
150            assert!(validator::validate("af=").is_err());
151        }
152    }
153}