raqiya_bible_reference/
lib.rs

1use std::{borrow::Cow, collections::HashMap};
2
3use regex::{Captures, Regex};
4
5mod books;
6
7use books::*;
8
9type BookHashMap = HashMap<BookId, (&'static str, u8)>;
10
11#[derive(Clone, Debug, Hash, Eq, PartialEq)]
12#[repr(u8)]
13pub enum BookId {
14    // All book IDs are listed in the canonical order,
15    // from the Old Testament to the New Testament.
16    Genesis,
17    Exodus,
18    Leviticus,
19    Numbers,
20    Deuteronomy,
21    Matthew,
22    John,
23}
24impl BookId {
25    fn find_by_sanitized_abbreviation<'a>(
26        text: &TextId,
27        abbreviation: &String,
28    ) -> Option<&'a BookId> {
29        match text {
30            TextId::EnLSB => BOOK_ABBREVIATIONS_TO_IDS_EN.get(abbreviation.as_str()),
31            TextId::FiR1933_38 => BOOK_ABBREVIATIONS_TO_IDS_FI.get(abbreviation.as_str()),
32        }
33    }
34}
35
36struct BookInfo;
37impl BookInfo {
38    fn get_by_book_id_and_text(book_id: &BookId, text: &TextId) -> Option<(&'static str, u8)> {
39        match text {
40            TextId::EnLSB => BOOK_INFO_FOR_EN_LSB.get(book_id).copied(),
41            TextId::FiR1933_38 => BOOK_INFO_FOR_FI_R1933_38.get(book_id).copied(),
42        }
43    }
44    pub fn sanitize(value: &str) -> String {
45        value.to_lowercase()
46    }
47}
48
49/// Represents a reference to a Bible passage via chapter and or more verses.
50#[derive(Debug)]
51pub enum Reference {
52    BookChapter(BookId, u8),
53    BookChapterNumber(BookId, u8, u8),
54    BookChapterNumberFromTo(BookId, u8, u8, u8),
55}
56impl Reference {
57    /// TODO: Optimize by avoiding match statement and just get data somewhere directly.
58    pub fn get_book_abbreviation(&self, text: &TextId) -> &'static str {
59        let book_id = &match self {
60            Self::BookChapter(book_id, _) => book_id.clone(),
61            Self::BookChapterNumber(book_id, _, _) => book_id.clone(),
62            Self::BookChapterNumberFromTo(book_id, _, _, _) => book_id.clone(),
63        };
64        let book_info = match text {
65            TextId::EnLSB => BOOK_INFO_FOR_EN_LSB.get(book_id),
66            TextId::FiR1933_38 => BOOK_INFO_FOR_FI_R1933_38.get(book_id),
67        };
68
69        // Book info should always be found. That is whu unwrapping is performed here
70        book_info.unwrap().0
71    }
72    /// TODO: Optimize by avoiding match statement and just get data somewhere directly.
73    #[inline]
74    pub fn get_chapter(&self) -> u8 {
75        match self {
76            Self::BookChapter(_, chapter) => *chapter,
77            Self::BookChapterNumber(_, chapter, _) => *chapter,
78            Self::BookChapterNumberFromTo(_, chapter, _, _) => *chapter,
79        }
80    }
81    /// TODO: Optimize by avoiding match statement and just get data somewhere directly.
82    #[inline]
83    pub fn get_number(&self) -> Option<u8> {
84        match self {
85            Self::BookChapterNumber(_, _, number) => Some(*number),
86            _ => None,
87        }
88    }
89    pub fn to_string(&self, text: &TextId) -> String {
90        static UNDEFINED: &'static str = "undefined";
91
92        match self {
93            Self::BookChapter(book_id, chapter) => {
94                if let Some((abbreviation, _)) = BookInfo::get_by_book_id_and_text(book_id, text) {
95                    format!("{} {}", abbreviation, chapter)
96                } else {
97                    format!("{} {}", UNDEFINED, chapter)
98                }
99            }
100            Self::BookChapterNumber(book_id, chapter, number) => {
101                if let Some((abbreviation, _)) = BookInfo::get_by_book_id_and_text(book_id, text) {
102                    format!("{} {}:{}", abbreviation, chapter, number)
103                } else {
104                    format!("{} {}:{}", UNDEFINED, chapter, number)
105                }
106            }
107            Self::BookChapterNumberFromTo(book_id, chapter, number_from, number_to) => {
108                if let Some((abbreviation, _)) = BookInfo::get_by_book_id_and_text(book_id, text) {
109                    format!("{} {}:{}-{}", abbreviation, chapter, number_from, number_to)
110                } else {
111                    format!("{} {}:{}-{}", UNDEFINED, chapter, number_from, number_to)
112                }
113            }
114        }
115    }
116}
117
118#[derive(Debug)]
119pub struct ReferenceMatch<'a> {
120    pub content: &'a str,
121    pub position: usize,
122}
123
124fn find_book_info_by_text(text: &TextId) -> &BookHashMap {
125    match text {
126        TextId::EnLSB => &BOOK_INFO_FOR_EN_LSB,
127        TextId::FiR1933_38 => &BOOK_INFO_FOR_FI_R1933_38,
128    }
129}
130/// Finds all Bible passage references in a given value with their content and position.
131/// Only those book abbreviations included in a given text are supported.
132pub fn find_reference_matches_in<'a>(content: &'a str, text: &TextId) -> Vec<ReferenceMatch<'a>> {
133    let re = make_reference_match_pattern(text);
134    re.captures_iter(content)
135        .map(|captures| {
136            let capture = captures.get(0).unwrap();
137            ReferenceMatch {
138                content: capture.as_str(),
139                position: capture.start(),
140            }
141        })
142        .collect::<Vec<_>>()
143}
144fn make_reference_match_pattern(text: &TextId) -> Regex {
145    let abbreviations = match text {
146        TextId::EnLSB => BOOK_ABBREVIATIONS_TO_IDS_EN.keys(),
147        TextId::FiR1933_38 => BOOK_ABBREVIATIONS_TO_IDS_FI.keys(),
148    };
149
150    let match_pattern = {
151        let abbreviations_in_pattern = abbreviations.map(|a| *a).collect::<Vec<_>>().join("|");
152        let chapter_pattern = "\\s\\d{1,}";
153        let chapter_and_number_pattern = "\\s\\d{1,}:\\d{1,}";
154        let abbreviations_and_chapter_and_number_in_pattern = format!(
155            "({})({}|{})",
156            abbreviations_in_pattern, chapter_and_number_pattern, chapter_pattern
157        );
158        format!("(?i)({})", abbreviations_and_chapter_and_number_in_pattern)
159    };
160
161    let re = Regex::new(match_pattern.as_str()).unwrap();
162    re
163}
164/// Parses a single reference from a string by a given text for the Bible.
165///
166/// Parsing takes into consideration number of chapters found in a book.
167/// If the given chapter exceeds the number of chapters, parsing a reference fails.
168pub fn parse_reference_by_text(reference: &str, text: &TextId) -> Option<Reference> {
169    let reference = reference.to_string();
170    let parts = reference.trim().split(" ").collect::<Vec<_>>();
171    match parts.len() {
172        2 => {
173            // Construct book abbreviation
174            let part_as_sanitized_book_abbreviation = BookInfo::sanitize(parts[0]);
175
176            let Some(book_id) =
177                BookId::find_by_sanitized_abbreviation(text, &part_as_sanitized_book_abbreviation) else {
178                    return None;
179                };
180
181            let Some((_, chapter_count)) = find_book_info_by_text(text).get(book_id) else {
182                return None;
183            };
184
185            // Construct chapter or chapter and number if there is a separator ':' between integers
186            match parts[1].split(":").collect::<Vec<_>>()[..] {
187                [chapter] => {
188                    let Ok(chapter_num) = chapter.parse::<u8>() else {
189                        return None;
190                    };
191
192                    if chapter_num < 1 || chapter_num > *chapter_count {
193                        return None;
194                    }
195
196                    Some(Reference::BookChapter(book_id.clone(), chapter_num))
197                }
198                [chapter, number] => {
199                    let Ok(chapter_num) = chapter.parse::<u8>() else {
200                        return None;
201                    };
202
203                    if chapter_num < 1 || chapter_num > *chapter_count {
204                        return None;
205                    }
206
207                    // Construct number or number from and number to if there is a separator '-' between integers
208                    match number.split("-").collect::<Vec<_>>()[..] {
209                        [number] => {
210                            let Ok(number_num) = number.parse::<u8>() else {
211                                return None;
212                            };
213
214                            Some(Reference::BookChapterNumber(
215                                book_id.clone(),
216                                chapter_num,
217                                number_num,
218                            ))
219                        }
220                        [number_from, number_to] => {
221                            let Ok(number_from_num) = number_from.parse::<u8>() else {
222                                return None;
223                            };
224                            let Ok(number_to_num) = number_to.parse::<u8>() else {
225                                return None;
226                            };
227
228                            Some(Reference::BookChapterNumberFromTo(
229                                book_id.clone(),
230                                chapter_num,
231                                number_from_num,
232                                number_to_num,
233                            ))
234                        }
235                        _ => None,
236                    }
237                }
238                _ => None,
239            }
240        }
241        _ => None,
242    }
243}
244/// The same as [parse_reference_by_text] except it parses and
245/// returns multiple references which are separated by a semicolon character (';').
246pub fn parse_references_by_text(reference: &str, text: &TextId) -> Vec<Option<Reference>> {
247    let s: String = reference.into();
248    s.split(";")
249        .map(|part| parse_reference_by_text(part, text))
250        .collect::<Vec<_>>()
251}
252/// Replaces all references found with a corresponding reference found according to a given text.
253///
254/// In case a replacement reference for the original reference cannot be parsed, the original reference remains.
255pub fn replace_reference_matches_in<'a, Replacer>(
256    content: &'a str,
257    text: &'a TextId,
258    replacer: Replacer,
259) -> Cow<'a, str>
260where
261    Replacer: Fn(&Reference) -> String,
262{
263    let re = make_reference_match_pattern(text);
264    let content_with_replacements =
265        re.replace_all(content.into(), |captures: &Captures| -> String {
266            let capture_content = captures.get(0).unwrap().as_str();
267            if let Some(reference) = parse_reference_by_text(capture_content, text) {
268                replacer(&reference)
269            } else {
270                capture_content.to_string()
271            }
272        });
273    content_with_replacements
274}
275
276/// Represents a text containg Bible content.
277/// It can be
278/// - a critical edition of the Old Testament (like BHS, Biblia Hebraica Stuttgartensia)
279/// - a critical edition of the New Testament (like NA28, Nestle-Aland Novum Testamentum Graece 28)
280/// - a partial translation of the Bible (like Septuagint, which contains only the Old Testament in Greek).
281/// - a complete translation of the Bible (like LSB, Legacy Standard Bible, an English translation).
282#[derive(Clone, Debug, Eq, Hash, PartialEq)]
283pub enum TextId {
284    EnLSB,
285    FiR1933_38,
286}
287impl TextId {
288    pub fn find_by_string(value: &String) -> Option<Self> {
289        let text_id = match value.as_str() {
290            "1933/-38" => TextId::FiR1933_38,
291            "LSB" => TextId::EnLSB,
292            _ => {
293                return None;
294            }
295        };
296        Some(text_id)
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::{
303        find_reference_matches_in, parse_reference_by_text, parse_references_by_text, BookId,
304        Reference, TextId,
305    };
306
307    macro_rules! unwrap_enum_variant {
308        ($value:expr, $pattern:pat => $extracted_value:expr) => {
309            match $value {
310                $pattern => $extracted_value,
311                _ => panic!("Given pattern does not match!"),
312            }
313        };
314    }
315
316    #[test]
317    fn convert_valid_reference_from_one_text_to_another() {
318        let reference = parse_reference_by_text("Joh 1", &TextId::FiR1933_38).unwrap();
319        let result = reference.to_string(&TextId::EnLSB);
320        assert_eq!(result, "John 1");
321    }
322    #[test]
323    fn fail_parse_reference_with_book_and_chapter_when_reference_is_contains_chapter_that_does_not_exist(
324    ) {
325        let text = TextId::FiR1933_38;
326
327        let reference = parse_reference_by_text("Joh 0", &text);
328        assert!(reference.is_none());
329
330        let reference = parse_reference_by_text("Joh 22", &text);
331        assert!(reference.is_none());
332    }
333    #[test]
334    fn fail_parse_reference_with_book_and_chapter_when_reference_is_incorrect() {
335        let text = TextId::FiR1933_38;
336
337        macro_rules! test_book_and_chapter {
338            ($reference: literal) => {
339                let reference = parse_reference_by_text($reference, &text);
340                assert!(reference.is_none());
341            };
342        }
343
344        test_book_and_chapter!("1");
345        test_book_and_chapter!("Nothing");
346        test_book_and_chapter!("Matt");
347        test_book_and_chapter!("Mat. 1");
348    }
349    #[test]
350    fn find_references_in_str() {
351        let matches = find_reference_matches_in("Example Matt. 3 (Joh 12:24)", &TextId::FiR1933_38);
352
353        assert_eq!(matches.len(), 2);
354
355        assert_eq!(matches[0].content, "Matt. 3");
356        assert_eq!(matches[0].position, 8);
357
358        assert_eq!(matches[1].content, "Joh 12:24");
359        assert_eq!(matches[1].position, 17);
360    }
361    #[test]
362    fn parse_multiple_references_with_book_and_chapter_when_references_are_correct() {
363        let references = parse_references_by_text("Matt 1; Joh. 1", &TextId::FiR1933_38);
364
365        unwrap_enum_variant!(references[0].as_ref().unwrap(), Reference::BookChapter(book_id, chapter) => {
366            assert_eq!(*book_id, BookId::Matthew);
367            assert_eq!(*chapter, 1);
368        });
369        unwrap_enum_variant!(references[1].as_ref().unwrap(), Reference::BookChapter(book_id, chapter) => {
370            assert_eq!(*book_id, BookId::John);
371            assert_eq!(*chapter, 1);
372        });
373    }
374    #[test]
375    fn parse_multiple_references_with_book_and_chapter_and_number_when_references_are_correct() {
376        let references = parse_references_by_text("Matt 19:18; Joh. 11:12", &TextId::FiR1933_38);
377
378        unwrap_enum_variant!(references[0].as_ref().unwrap(), Reference::BookChapterNumber(book_id, chapter, number) => {
379            assert_eq!(*book_id, BookId::Matthew);
380            assert_eq!(*chapter, 19);
381            assert_eq!(*number, 18);
382        });
383        unwrap_enum_variant!(references[1].as_ref().unwrap(), Reference::BookChapterNumber(book_id, chapter, number) => {
384            assert_eq!(*book_id, BookId::John);
385            assert_eq!(*chapter, 11);
386            assert_eq!(*number, 12);
387        });
388    }
389    #[test]
390    fn parse_reference_with_book_and_chapter_when_reference_is_correct() {
391        let text = TextId::FiR1933_38;
392
393        macro_rules! test_book_and_chapter {
394            ($reference: literal, $bookId: ident, $chapter:literal) => {
395                let reference = parse_reference_by_text($reference, &text).unwrap();
396
397                unwrap_enum_variant!(reference, Reference::BookChapter(book_id, chapter) => {
398                    assert_eq!(book_id, BookId::$bookId);
399                    assert_eq!(chapter, $chapter);
400                });
401            };
402        }
403
404        test_book_and_chapter!("matt 1", Matthew, 1);
405        test_book_and_chapter!("Matt. 1", Matthew, 1);
406        test_book_and_chapter!("Matt. 10", Matthew, 10);
407        test_book_and_chapter!("Joh. 1", John, 1);
408    }
409    #[test]
410    fn parse_reference_with_book_and_chapter_and_number_when_reference_is_correct() {
411        let text = TextId::FiR1933_38;
412
413        let reference = parse_reference_by_text("Joh 1:1", &text);
414
415        unwrap_enum_variant!(reference.unwrap(), Reference::BookChapterNumber(book_id, chapter, number) => {
416            assert_eq!(book_id, BookId::John);
417            assert_eq!(chapter, 1);
418            assert_eq!(number, 1);
419        });
420
421        let reference = parse_reference_by_text("Joh 20:23", &text);
422
423        unwrap_enum_variant!(reference.unwrap(), Reference::BookChapterNumber(book_id, chapter, number) => {
424            assert_eq!(book_id, BookId::John);
425            assert_eq!(chapter, 20);
426            assert_eq!(number, 23);
427        });
428    }
429    #[test]
430    fn parse_reference_with_book_and_chapter_and_number_from_and_number_to_when_reference_is_correct(
431    ) {
432        let text = TextId::FiR1933_38;
433
434        let reference = parse_reference_by_text("Joh 1:3-8", &text);
435
436        unwrap_enum_variant!(reference.unwrap(), Reference::BookChapterNumberFromTo(book_id, chapter, number_from, number_to) => {
437            assert_eq!(book_id, BookId::John);
438            assert_eq!(chapter, 1);
439            assert_eq!(number_from, 3);
440            assert_eq!(number_to, 8);
441        });
442
443        let reference = parse_reference_by_text("Joh 20:15-27", &text);
444
445        unwrap_enum_variant!(reference.unwrap(), Reference::BookChapterNumberFromTo(book_id, chapter, number_from, number_to) => {
446            assert_eq!(book_id, BookId::John);
447            assert_eq!(chapter, 20);
448            assert_eq!(number_from, 15);
449            assert_eq!(number_to, 27);
450        });
451    }
452}