gnt_tools/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
//! This crate has been created in order to be a tool box for studying the greek new testament.
//! The crate is provided AS-IS.
//! # Examples
//!
//! ```
//! use gnt_tools::core_text;
//!
//! let s = "16 Εἶπεν δὲ παραβολὴν πρὸς αὐτοὺς λέγων·
//! ἀνθρώπου τινὸς πλουσίου εὐφόρησεν ἡ χώρα. 17
//! καὶ διελογίζετο ἐν ἑαυτῷ λέγων· τί ποιήσω, ὅτι
//! οὐκ ἔχω ποῦ συνάξω τοὺς καρπούς μου; ";
//!
//! let s2 = "ειπενδεπαραβοληνπροϲαυτουϲλεγωνανθρωπουτ\
//! ινοϲπλουϲιουευφορηϲενηχωρακαιδιελογιζετοενεαυτω\
//! λεγωντιποιηϲωοτιουκεχωπουϲυναξωτουϲκαρπουϲμου";
//!
//! assert_eq!(core_text(String::from(s)), String::from(s2));
//! ```
use unicode_normalization::UnicodeNormalization;
/*
If one has developped a greedy function (cf. below),
this format function would still may be useful to detect
a uncommon character in the greek text (by comparing its
result with the greedy result).
*/
/// The function gives the core text of a greek new testament critical edition.
///It might be useful for comparing greek new testament critical editions by gettig their "core" differences/concordances.
///
/// Note on this function :
/// - it does not replace nomina sacras (e.g., κϲ) by their non-abreviated form (resp. κυριοϲ), nor words (e.g., κύριος) by their nomina sacras form (when a nomina sacra form exists) (resp. κϲ).
/// - it is made to delete any character used to encode nomina sacras (e.g., '|', or '(' and ')'), hence |κς| will give κϲ.
/// # Example :
/// ```
/// use gnt_tools::core_text;
///
/// let s = "16 Εἶπεν δὲ παραβολὴν πρὸς αὐτοὺς λέγων·
/// ἀνθρώπου τινὸς πλουσίου εὐφόρησεν ἡ χώρα. 17
/// καὶ διελογίζετο ἐν ἑαυτῷ λέγων· τί ποιήσω, ὅτι
/// οὐκ ἔχω ποῦ συνάξω τοὺς καρπούς μου; ";
///
/// let s2 = "ειπενδεπαραβοληνπροϲαυτουϲλεγωνανθρωπουτ\
/// ινοϲπλουϲιουευφορηϲενηχωρακαιδιελογιζετοενεαυτω\
/// λεγωντιποιηϲωοτιουκεχωπουϲυναξωτουϲκαρπουϲμου";
///
/// assert_eq!(core_text(String::from(s)), String::from(s2));
/// ```
pub fn core_text(mut s : String) -> String {
// We remove diacritics signs. Doing it now avoids the greedy
// format to remove some greek letters (the accentued ones).
const LEN: usize = '\u{036f}' as usize - '\u{0300}' as usize;
let mut arr = ['\0'; LEN];
for (item, ch) in std::iter::zip(&mut arr, '\u{0300}'..='\u{036f}') {
*item = ch;
}
s = s.nfd().to_string().replace(arr, "");
s = replace(s);
greedy_format(s.as_str())
}
fn replace (mut s : String) -> String {
// We remplace any "invisible nu" by a "true one". ------------------
s = s.replace("ˉ", "ν");
// We change any uppercase letter to lowercase. ---------------------
s = s.to_lowercase();
// We replace every sigmas to the lunar sigma. ----------------------
s.replace(&['σ', 'ς'], "ϲ")
}
fn greedy_format(s : &str) -> String {
#[allow(non_snake_case)]
let S = String::from(s);
// We remove any character that is not a greek character.
S.chars().filter(|c|
*c >= '\u{03B1}' && *c <= '\u{03C9}' // lowercases
|| *c == '\u{03F2}' // the lunar sigma
)
.collect::<String>()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_core_text() {
let s = "16 Εἶπεν δὲ παραβολὴν πρὸς αὐτοὺς λέγων·
ἀνθρώπου τινὸς πλουσίου εὐφόρησεν ἡ χώρα. 17
καὶ διελογίζετο ἐν ἑαυτῷ λέγων· τί ποιήσω, ὅτι
οὐκ ἔχω ποῦ συνάξω τοὺς καρπούς μου; ";
let s2 = "ειπενδεπαραβοληνπροϲαυτουϲλεγωνανθρωπουτ\
ινοϲπλουϲιουευφορηϲενηχωρακαιδιελογιζετοενεαυτω\
λεγωντιποιηϲωοτιουκεχωπουϲυναξωτουϲκαρπουϲμου";
assert_eq!(core_text(String::from(s)), String::from(s2));
}
}