gnt_tools/lib.rs
1//! Tools to help studying the greek new testament.
2//! The crate is provided AS-IS.
3//! # Examples
4//!
5//! ```
6//! use gnt_tools::core_text;
7//!
8//! let s = "16 Εἶπεν δὲ παραβολὴν πρὸς αὐτοὺς λέγων·
9//! ἀνθρώπου τινὸς πλουσίου εὐφόρησεν ἡ χώρα. 17
10//! καὶ διελογίζετο ἐν ἑαυτῷ λέγων· τί ποιήσω, ὅτι
11//! οὐκ ἔχω ποῦ συνάξω τοὺς καρπούς μου; ";
12//!
13//! let s2 = "ειπενδεπαραβοληνπροϲαυτουϲλεγωνανθρωπουτ\
14//! ινοϲπλουϲιουευφορηϲενηχωρακαιδιελογιζετοενεαυτω\
15//! λεγωντιποιηϲωοτιουκεχωπουϲυναξωτουϲκαρπουϲμου";
16//!
17//! assert_eq!(core_text(String::from(s)), String::from(s2));
18//! ```
19
20use unicode_normalization::UnicodeNormalization;
21
22/// The function gives the core text of a greek new testament critical edition.
23/// It might be useful for comparing greek new testament critical editions by gettig their "core" differences/concordances.
24///
25/// In concrete terms, it remove diacritic signs, remove any character that is not in the greek alphabet, change all sigmas to lunar sigma, and puts all greek letters in lowercase.
26///
27/// So this function :
28/// - does not replace nomina sacras (e.g., κϲ) by their non-abreviated form (resp. κυριοϲ), nor words (e.g., κύριος) by their nomina sacras form (when a nomina sacra form exists) (resp. κϲ).
29/// - is made to delete any character used to encode nomina sacras (e.g., '|', or '(' and ')'), hence |κς| will give κϲ.
30/// - does delete all 'ˉ' characters (so παραβολὴˉ becomes παραβολη, not παραβολην)
31/// # Example :
32/// ```
33/// use gnt_tools::core_text;
34///
35/// let s = "16 Εἶπεν δὲ παραβολὴν πρὸς αὐτοὺς λέγων·
36/// ἀνθρώπου τινὸς πλουσίου εὐφόρησεν ἡ χώρα. 17
37/// καὶ διελογίζετο ἐν ἑαυτῷ λέγων· τί ποιήσω, ὅτι
38/// οὐκ ἔχω ποῦ συνάξω τοὺς καρπούς μου; ";
39///
40/// let s2 = "ειπενδεπαραβοληνπροϲαυτουϲλεγωνανθρωπουτ\
41/// ινοϲπλουϲιουευφορηϲενηχωρακαιδιελογιζετοενεαυτω\
42/// λεγωντιποιηϲωοτιουκεχωπουϲυναξωτουϲκαρπουϲμου";
43///
44/// assert_eq!(core_text(String::from(s)), String::from(s2));
45/// ```
46pub fn core_text(mut s : String) -> String {
47
48 // We remove diacritics signs. Doing it now avoids the greedy_format
49 // function to remove some greek letters (the accentued ones).
50 const LEN: usize = '\u{036f}' as usize - '\u{0300}' as usize;
51 let mut arr = ['\0'; LEN];
52 for (item, ch) in std::iter::zip(&mut arr, '\u{0300}'..='\u{036f}') {
53 *item = ch;
54 }
55 s = s.nfd().to_string().replace(arr, "");
56
57 s = replace(s);
58 greedy_format(s.as_str())
59}
60
61fn replace (mut s : String) -> String {
62
63 // We remplace any "invisible nu" by a "true one". ------------------
64 // EDIT : not anymore (because it's not easy to manage), but I hope later.
65 // s = s.replace("ˉ", "ν");
66
67 // We change any uppercase letter to lowercase. ---------------------
68 s = s.to_lowercase();
69
70 // We replace every sigmas to the lunar sigma. ----------------------
71 s.replace(&['σ', 'ς'], "ϲ")
72}
73
74fn greedy_format(s : &str) -> String {
75
76 #[allow(non_snake_case)]
77 let S = String::from(s);
78
79 // We remove any character that is not a greek character.
80 S.chars().filter(|c|
81 *c >= '\u{03B1}' && *c <= '\u{03C9}' // lowercases
82 || *c == '\u{03F2}' // the lunar sigma
83 )
84 .collect::<String>()
85}
86
87#[cfg(test)]
88mod tests {
89 use super::*;
90
91 #[test]
92 fn test_core_text() {
93
94 let s = "16 Εἶπεν δὲ παραβολὴν πρὸς αὐτοὺς λέγων·
95 ἀνθρώπου τινὸς πλουσίου εὐφόρησεν ἡ χώρα. 17
96 καὶ διελογίζετο ἐν ἑαυτῷ λέγων· τί ποιήσω, ὅτι
97 οὐκ ἔχω ποῦ συνάξω τοὺς καρπούς μου; ";
98
99 let s2 = "ειπενδεπαραβοληνπροϲαυτουϲλεγωνανθρωπουτ\
100 ινοϲπλουϲιουευφορηϲενηχωρακαιδιελογιζετοενεαυτω\
101 λεγωντιποιηϲωοτιουκεχωπουϲυναξωτουϲκαρπουϲμου";
102
103 assert_eq!(core_text(String::from(s)), String::from(s2));
104 }
105}