abjad/
lib.rs

1//! This library is meant to facilitate calculating the
2//! [numerical _abjad_ value](https://en.wikipedia.org/wiki/Abjad_numerals)
3//! of a string of text in Arabic or Persian (support for other Arabic-script
4//! languages may be added over time).
5//!
6//! At the moment, this simply adds three methods for `&str`:
7//!
8//! - `abjad` returns a best-effort value, ignoring unrecognized characters.
9//! - `abjad_collect_errors` also records unrecognized characters in a `Vec`.
10//! - `abjad_strict` returns an error as soon as any character is not recognized.
11//!
12
13#![forbid(unsafe_code)]
14#![deny(missing_docs)]
15#![warn(clippy::cargo, clippy::nursery, clippy::pedantic)]
16#![allow(clippy::too_long_first_doc_paragraph)]
17
18use thiserror::Error;
19
20/// The error type for this crate. Currently there is only one member:
21/// `UnrecognizedCharacter`, which is returned by `abjad_strict` upon encountering
22/// any character outside of the Arabic script.
23#[derive(Error, Debug)]
24pub enum AbjadError {
25    /// This error is returned by `abjad_strict` upon encountering any character
26    /// outside of the Arabic script. It reports the Unicode escape sequence for
27    /// the character in question.
28    #[error("Unrecognized character: {0}")]
29    UnrecognizedCharacter(String),
30}
31
32/// We need to allow some options for _abjad_ calculation. At present there are
33/// four: three booleans and one `enum`. All of the booleans are false by default.
34/// The `enum` also has a default value, which should be suitable for the vast
35/// majority of use cases. If you don't need to change any of the options, then,
36/// when calling one of the methods, you can simply pass `AbjadPrefs::default()`.
37#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
38pub struct AbjadPrefs {
39    /// Count the [_shaddah_](https://en.wikipedia.org/wiki/Shadda) diacritic?
40    /// This will have the effect of doubling the value of the preceding letter.
41    pub count_shaddah: bool,
42
43    /// Count [_alif maddah_](https://en.wiktionary.org/wiki/maddah) as a double
44    /// _alif_ (with value 2 instead of 1)?
45    pub double_alif_maddah: bool,
46
47    /// Ignore the pseudo-letter [_hamzah_](https://en.wikipedia.org/wiki/Hamza)
48    /// in its isolated state? (By default we assign it a value of 1.)
49    pub ignore_lone_hamzah: bool,
50
51    /// Which letter order to use: Mashriqi (default) or Maghribi? (Unless you
52    /// are certain that you need the latter, you probably don't.)
53    pub letter_order: LetterOrder,
54}
55
56/// This `enum` allows for a selection of the letter order for _abjad_ values
57/// (Mashriqi by default).
58#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
59pub enum LetterOrder {
60    /// Maghribi letter order
61    Maghribi,
62    #[default]
63    /// Mashriqi letter order (default and much more common)
64    Mashriqi,
65}
66
67/// This is the trait that we implement for `&str`, allowing us to use the new
68/// methods.
69pub trait Abjad {
70    /// This returns a best-effort value, ignoring unrecognized characters.
71    fn abjad(self, prefs: AbjadPrefs) -> u32;
72
73    /// This returns a tuple, with unrecognized characters (Unicode-escaped)
74    /// in a `Vec`.
75    fn abjad_collect_errors(self, prefs: AbjadPrefs) -> (u32, Vec<String>);
76
77    /// # Errors
78    /// This returns an error as soon as any unrecognized character is encountered.
79    fn abjad_strict(self, prefs: AbjadPrefs) -> Result<u32, AbjadError>;
80}
81
82impl Abjad for &str {
83    fn abjad(self, prefs: AbjadPrefs) -> u32 {
84        let mut abjad_total: u32 = 0;
85        let mut last_value: u32 = 0;
86
87        for character in self.chars() {
88            if let Ok(new_value) = get_letter_value(character, last_value, prefs) {
89                abjad_total += new_value;
90                last_value = new_value;
91            } else {
92                last_value = 0;
93            }
94        }
95
96        abjad_total
97    }
98
99    fn abjad_collect_errors(self, prefs: AbjadPrefs) -> (u32, Vec<String>) {
100        let mut abjad_total: u32 = 0;
101        let mut errors: Vec<String> = Vec::new();
102        let mut last_value: u32 = 0;
103
104        for character in self.chars() {
105            if let Ok(new_value) = get_letter_value(character, last_value, prefs) {
106                abjad_total += new_value;
107                last_value = new_value;
108            } else {
109                errors.push(character.escape_unicode().collect());
110                last_value = 0;
111            }
112        }
113
114        (abjad_total, errors)
115    }
116
117    fn abjad_strict(self, prefs: AbjadPrefs) -> Result<u32, AbjadError> {
118        let mut abjad_total: u32 = 0;
119        let mut last_value: u32 = 0;
120
121        for character in self.chars() {
122            let new_value = get_letter_value(character, last_value, prefs)?;
123
124            abjad_total += new_value;
125            last_value = new_value;
126        }
127
128        Ok(abjad_total)
129    }
130}
131
132fn get_letter_value(
133    character: char,
134    last_value: u32,
135    prefs: AbjadPrefs,
136) -> Result<u32, AbjadError> {
137    let maghribi_order = prefs.letter_order == LetterOrder::Maghribi;
138
139    let mut letter_value: u32 = 0;
140
141    match character {
142        'ا' | 'أ' | 'إ' | 'ٱ' => letter_value = 1,
143        'آ' => {
144            if prefs.double_alif_maddah {
145                letter_value = 2;
146            } else {
147                letter_value = 1;
148            }
149        }
150        'ء' => {
151            if !prefs.ignore_lone_hamzah {
152                letter_value = 1;
153            }
154        }
155        'ب' | 'پ' => letter_value = 2,
156        'ج' | 'چ' => letter_value = 3,
157        'د' => letter_value = 4,
158        'ه' | 'ة' | 'ۀ' => letter_value = 5,
159        'و' | 'ؤ' => letter_value = 6,
160        'ز' | 'ژ' => letter_value = 7,
161        'ح' => letter_value = 8,
162        'ط' => letter_value = 9,
163        'ي' | 'ى' | 'ئ' | 'ی' => letter_value = 10,
164        'ك' | 'ک' | 'گ' => letter_value = 20,
165        'ل' => letter_value = 30,
166        'م' => letter_value = 40,
167        'ن' => letter_value = 50,
168        'س' => {
169            if maghribi_order {
170                letter_value = 300;
171            } else {
172                letter_value = 60;
173            }
174        }
175        'ع' => letter_value = 70,
176        'ف' => letter_value = 80,
177        'ص' => {
178            if maghribi_order {
179                letter_value = 60;
180            } else {
181                letter_value = 90;
182            }
183        }
184        'ق' => letter_value = 100,
185        'ر' => letter_value = 200,
186        'ش' => {
187            if maghribi_order {
188                letter_value = 1000;
189            } else {
190                letter_value = 300;
191            }
192        }
193        'ت' => letter_value = 400,
194        'ث' => letter_value = 500,
195        'خ' => letter_value = 600,
196        'ذ' => letter_value = 700,
197        'ض' => {
198            if maghribi_order {
199                letter_value = 90;
200            } else {
201                letter_value = 800;
202            }
203        }
204        'ظ' => {
205            if maghribi_order {
206                letter_value = 800;
207            } else {
208                letter_value = 900;
209            }
210        }
211        'غ' => {
212            if maghribi_order {
213                letter_value = 900;
214            } else {
215                letter_value = 1000;
216            }
217        }
218        // Shaddah diacritic
219        '\u{0651}' => {
220            if prefs.count_shaddah {
221                letter_value = last_value;
222            }
223        }
224        // Space or zwnj is ok
225        ' ' | '\u{200C}' => {}
226        // Otherwise return error
227        _ => {
228            let escaped: String = character.escape_unicode().collect();
229            return Err(AbjadError::UnrecognizedCharacter(escaped));
230        }
231    }
232
233    Ok(letter_value)
234}