1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

/// Reformat a string of Hebrew/Aramaic from the QWB/QD database in order to
/// maintain a constitent display format
/// 
/// # Arguments
/// 
/// * `word` - The word to normalize
/// * `genre_id` - The genre of the text being normalized (see the `manuscript_genre`
/// value in the `manuscript table`)
/// 
/// 
pub fn normalize_string(word: &str, genre_id: u8) -> String {
    let cleaned_word = word.replace('<', "〈").replace('>', "〉");

    // genre 1 is Qumran non-biblical, 2 is Qumran biblical, and 3 is biblical
    // replace Maqqef with a circle in the Qumran texts
    match genre_id {
        1 | 2 => cleaned_word.replace('\u{05BE}', "○"),
        3 | _ => cleaned_word,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn replace_maqqef_for_proper_genre() {
        const INIT_HEBREW: &str = "את־הארץ";
        const CORRECT_CONVERTED_HEBREW: &str = "את○הארץ";
        let converted_hebrew_1 = normalize_string(INIT_HEBREW, 1_u8);
        let converted_hebrew_2 = normalize_string(INIT_HEBREW, 2_u8);
        let converted_hebrew_3 = normalize_string(INIT_HEBREW, 3_u8);
        
        assert_eq!(converted_hebrew_1, CORRECT_CONVERTED_HEBREW);
        assert_eq!(converted_hebrew_2, CORRECT_CONVERTED_HEBREW);
        assert_eq!(converted_hebrew_3, INIT_HEBREW);
    }

    #[test]
    fn replace_angled_brackets() {
        const INIT_HEBREW: &str = "את ה<א>רץ";
        const CORRECT_CONVERTED_HEBREW: &str = "את ה〈א〉רץ";
        let converted_hebrew_1 = normalize_string(INIT_HEBREW, 1_u8);
        let converted_hebrew_2 = normalize_string(INIT_HEBREW, 2_u8);
        let converted_hebrew_3 = normalize_string(INIT_HEBREW, 3_u8);
        
        assert_eq!(converted_hebrew_1, CORRECT_CONVERTED_HEBREW);
        assert_eq!(converted_hebrew_2, CORRECT_CONVERTED_HEBREW);
        assert_eq!(converted_hebrew_3, CORRECT_CONVERTED_HEBREW);
    }
}