gnt_tools/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
//! This crate has been created in order to be a tool box for studying the greek new testament.
//! The crate is provided AS-IS.
//! # Examples
//! 
//! ```
//! use gnt_tools::core_text;
//!
//! let s = "16 Εἶπεν δὲ παραβολὴν πρὸς αὐτοὺς λέγων·
//!          ἀνθρώπου τινὸς πλουσίου εὐφόρησεν ἡ χώρα. 17 
//!          καὶ διελογίζετο ἐν ἑαυτῷ λέγων· τί ποιήσω, ὅτι 
//!          οὐκ ἔχω ποῦ συνάξω τοὺς καρπούς μου; ";
//!
//! let s2 = "ειπενδεπαραβοληνπροϲαυτουϲλεγωνανθρωπουτ\
//!           ινοϲπλουϲιουευφορηϲενηχωρακαιδιελογιζετοενεαυτω\
//!           λεγωντιποιηϲωοτιουκεχωπουϲυναξωτουϲκαρπουϲμου";
//!
//! assert_eq!(core_text(String::from(s)), String::from(s2));
//! ```

use unicode_normalization::UnicodeNormalization;

/*
   If one has developped a greedy function (cf. below),
   this format function would still may be useful to detect 
   a uncommon character in the greek text (by comparing its 
   result with the greedy result).
*/
/// The function gives the core text of a greek new testament critical edition.  
///It might be useful for comparing greek new testament critical editions by gettig their "core" differences/concordances.
///
/// Note on this function : 
/// - it does not replace nomina sacras (e.g., κϲ) by their non-abreviated form (resp. κυριοϲ), nor words (e.g., κύριος) by their nomina sacras form (when a nomina sacra form exists) (resp. κϲ).
/// - it is made to delete any character used to encode nomina sacras (e.g., '|', or '(' and ')'), hence |κς| will give κϲ.
/// # Example : 
/// ```
/// use gnt_tools::core_text;
///
/// let s = "16 Εἶπεν δὲ παραβολὴν πρὸς αὐτοὺς λέγων·
///          ἀνθρώπου τινὸς πλουσίου εὐφόρησεν ἡ χώρα. 17 
///          καὶ διελογίζετο ἐν ἑαυτῷ λέγων· τί ποιήσω, ὅτι 
///          οὐκ ἔχω ποῦ συνάξω τοὺς καρπούς μου; ";
///
/// let s2 = "ειπενδεπαραβοληνπροϲαυτουϲλεγωνανθρωπουτ\
///           ινοϲπλουϲιουευφορηϲενηχωρακαιδιελογιζετοενεαυτω\
///           λεγωντιποιηϲωοτιουκεχωπουϲυναξωτουϲκαρπουϲμου";
///
/// assert_eq!(core_text(String::from(s)), String::from(s2));
/// ```
pub fn core_text(mut s : String) -> String {

    // We remove diacritics signs. Doing it now avoids the greedy 
    // format to remove some greek letters (the accentued ones).
    const LEN: usize = '\u{036f}' as usize - '\u{0300}' as usize;
    let mut arr = ['\0'; LEN];
    for (item, ch) in std::iter::zip(&mut arr, '\u{0300}'..='\u{036f}') {
        *item = ch;
    }
    s = s.nfd().to_string().replace(arr, "");

    s = replace(s);
    greedy_format(s.as_str())
}

fn replace (mut s : String) -> String {
    
    // We remplace any "invisible nu" by a "true one". ------------------
    s = s.replace("ˉ", "ν");
    
    // We change any uppercase letter to lowercase. ---------------------
    s = s.to_lowercase();
    
    // We replace every sigmas to the lunar sigma. ----------------------
    s.replace(&['σ', 'ς'], "ϲ")
}

fn greedy_format(s : &str) -> String {

    #[allow(non_snake_case)]
    let S = String::from(s);
    
    // We remove any character that is not a greek character.
    S.chars().filter(|c| 
                           *c >= '\u{03B1}' && *c <= '\u{03C9}' // lowercases
                        || *c == '\u{03F2}'                     // the lunar sigma
                    )
             .collect::<String>()
}

#[cfg(test)]
mod tests {
    use super::*;
    
    #[test]
    fn test_core_text() {
   
        let s = "16 Εἶπεν δὲ παραβολὴν πρὸς αὐτοὺς λέγων·
            ἀνθρώπου τινὸς πλουσίου εὐφόρησεν ἡ χώρα. 17 
            καὶ διελογίζετο ἐν ἑαυτῷ λέγων· τί ποιήσω, ὅτι 
            οὐκ ἔχω ποῦ συνάξω τοὺς καρπούς μου; ";

        let s2 = "ειπενδεπαραβοληνπροϲαυτουϲλεγωνανθρωπουτ\
            ινοϲπλουϲιουευφορηϲενηχωρακαιδιελογιζετοενεαυτω\
            λεγωντιποιηϲωοτιουκεχωπουϲυναξωτουϲκαρπουϲμου";

        assert_eq!(core_text(String::from(s)), String::from(s2));
    }
}