ar_reshaper/
config.rs

1use crate::{form::LettersType, ligatures::*, ArabicReshaper};
2
3/// Flags to enable some or all groups of ligatures
4#[derive(Debug, Clone, Copy)]
5pub struct LigaturesFlags {
6    #[cfg_attr(
7        feature = "ttf-parser",
8        doc = "We dont check and enable default ligatures when loading ligatures from font"
9    )]
10    pub default_ligatures: bool,
11    pub sentences_ligatures: bool,
12    pub words_ligatures: bool,
13    pub letters_ligatures: bool,
14}
15
16impl LigaturesFlags {
17    /// Enable just some of defaults ligatures
18    pub const fn default() -> Self {
19        Self {
20            default_ligatures: true,
21            sentences_ligatures: false,
22            words_ligatures: false,
23            letters_ligatures: false,
24        }
25    }
26
27    /// Enable all ligatures
28    pub const fn all() -> Self {
29        Self {
30            default_ligatures: false,
31            sentences_ligatures: true,
32            words_ligatures: true,
33            letters_ligatures: true,
34        }
35    }
36
37    /// Disable all ligatures
38    pub const fn none() -> Self {
39        Self {
40            default_ligatures: false,
41            sentences_ligatures: false,
42            words_ligatures: false,
43            letters_ligatures: false,
44        }
45    }
46
47    /// Check if no ligature is enabled
48    pub const fn is_none_enabled(&self) -> bool {
49        !self.sentences_ligatures && !self.words_ligatures && !self.letters_ligatures
50    }
51}
52
53/// Supported languages
54#[derive(Clone, Copy, Debug, Default, PartialEq)]
55#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
56pub enum Language {
57    /// `Arabic` is default and recommended to work in most of the cases
58    #[default]
59    Arabic,
60    /// `ArabicV2` is only to be used with certain font that you run into missing chars
61    ArabicV2,
62    /// `Kurdish` if you are using Kurdish Sarchia font is recommended,
63    /// work with both unicode and classic Arabic-Kurdish keybouard
64    Kurdish,
65    /// Custom language
66    #[cfg_attr(feature = "serde", serde(skip))] // we can't serialize this
67    Custom(&'static [LettersType]),
68}
69
70impl core::fmt::Display for Language {
71    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
72        match self {
73            Language::Arabic => "Arabic",
74            Language::ArabicV2 => "ArabicV2",
75            Language::Kurdish => "Kurdish",
76            Language::Custom(_) => "Custom",
77        }
78        .fmt(f)
79    }
80}
81
82/// Hold state of whatever some ligatures are enabled or not.
83#[derive(Clone)]
84#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
85pub struct Ligatures {
86    #[cfg_attr(feature = "serde", serde(with = "arrays"))]
87    pub(crate) list: [bool; LIGATURES.len()],
88}
89
90impl Ligatures {
91    /// Create a new [Ligatures] with all the ligatures disabled
92    pub const fn empty() -> Self {
93        Self {
94            list: [false; LIGATURES.len()],
95        }
96    }
97
98    /// Enable some default ligatures
99    pub const fn default() -> Self {
100        let mut ligatures = Self::empty();
101
102        ligatures.list[LigatureNames::ARABIC_LIGATURE_ALLAH as usize] = true;
103        ligatures.list[LigatureNames::ARABIC_LIGATURE_LAM_WITH_ALEF as usize] = true;
104        ligatures.list[LigatureNames::ARABIC_LIGATURE_LAM_WITH_ALEF_WITH_HAMZA_ABOVE as usize] =
105            true;
106        ligatures.list[LigatureNames::ARABIC_LIGATURE_LAM_WITH_ALEF_WITH_HAMZA_BELOW as usize] =
107            true;
108        ligatures.list[LigatureNames::ARABIC_LIGATURE_LAM_WITH_ALEF_WITH_MADDA_ABOVE as usize] =
109            true;
110
111        ligatures
112    }
113
114    /// Is any ligature enabled
115    const fn is_any_enabled(&self) -> bool {
116        let mut idx = 0;
117        while idx < self.list.len() {
118            if self.list[idx] {
119                return true;
120            }
121            idx += 1;
122        }
123        false
124    }
125
126    /// Is the input ligature enabled
127    pub const fn is_ligature_enabled(&self, name: LigatureNames) -> bool {
128        self.list[name as usize]
129    }
130}
131
132/// The main Config struct for the [ArabicReshaper]
133///
134/// You can change all kinds of settings about [ArabicReshaper] using this struct.
135#[derive(Clone)]
136#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
137pub struct ReshaperConfig {
138    /// Supported languages are: **Arabic, ArabicV2, Kurdish**
139    /// More languages might be supported soon.
140    pub language: Language,
141    /// Whether to delete the Harakat (Tashkeel) before reshaping or not.
142    pub delete_harakat: bool,
143    /// Whether to shift the Harakat (Tashkeel) one position so they appear
144    /// correctly when string is reversed
145    pub shift_harakat_position: bool,
146    /// Whether to delete the Tatweel (U+0640) before reshaping or not.
147    pub delete_tatweel: bool,
148    /// Whether to support ZWJ (U+200D) or not.
149    pub support_zwj: bool,
150    /// Use unshaped form instead of isolated form.
151    pub use_unshaped_instead_of_isolated: bool,
152    /// Whether to use ligatures or not.
153    /// Serves as a shortcut to disable all ligatures.
154    pub support_ligatures: bool,
155    /// When `support_ligatures` is enabled.
156    /// Separate ligatures configuration take precedence over it.
157    /// When `support_ligatures` is disabled,
158    /// separate ligatures configurations are ignored.
159    pub ligatures: Ligatures,
160}
161
162impl Default for ReshaperConfig {
163    fn default() -> Self {
164        let mut ligatures = Ligatures::empty();
165        // enable some default ones
166        for name in [
167            LigatureNames::ARABIC_LIGATURE_ALLAH,
168            LigatureNames::ARABIC_LIGATURE_LAM_WITH_ALEF,
169            LigatureNames::ARABIC_LIGATURE_LAM_WITH_ALEF_WITH_HAMZA_ABOVE,
170            LigatureNames::ARABIC_LIGATURE_LAM_WITH_ALEF_WITH_HAMZA_BELOW,
171            LigatureNames::ARABIC_LIGATURE_LAM_WITH_ALEF_WITH_MADDA_ABOVE,
172        ] {
173            ligatures.list[name as usize] = true;
174        }
175
176        Self {
177            language: Default::default(),
178            delete_harakat: true,
179            shift_harakat_position: false,
180            delete_tatweel: false,
181            support_zwj: true,
182            use_unshaped_instead_of_isolated: false,
183            support_ligatures: true,
184            ligatures,
185        }
186    }
187}
188
189impl ReshaperConfig {
190    /// Create a new [ReshaperConfig] with the given [LigaturesFlags].
191    pub const fn new(language: Language, ligatures_flags: LigaturesFlags) -> Self {
192        let mut ligatures = if ligatures_flags.default_ligatures {
193            Ligatures::default()
194        } else {
195            Ligatures::empty()
196        };
197
198        if !ligatures_flags.is_none_enabled() {
199            let LigaturesFlags {
200                sentences_ligatures,
201                words_ligatures,
202                letters_ligatures,
203                ..
204            } = ligatures_flags;
205
206            macro_rules! enable_ligatures {
207                ($range:expr) => {{
208                    let mut idx = $range.start;
209                    while idx < $range.end {
210                        ligatures.list[idx] = true;
211                        idx += 1;
212                    }
213                }};
214            }
215
216            if sentences_ligatures {
217                enable_ligatures!(SENTENCES_LIGATURES_RANGE)
218            }
219            if words_ligatures {
220                enable_ligatures!(WORDS_LIGATURES_RANGE)
221            }
222            if letters_ligatures {
223                enable_ligatures!(LETTERS_LIGATURES_RANGE)
224            }
225        }
226
227        Self {
228            language,
229            support_ligatures: !ligatures_flags.is_none_enabled(),
230            ligatures,
231            delete_harakat: true,
232            shift_harakat_position: false,
233            delete_tatweel: false,
234            support_zwj: true,
235            use_unshaped_instead_of_isolated: false,
236        }
237    }
238
239    /// Create a new [ArabicReshaper] from the config.
240    pub fn to_reshaper(self) -> ArabicReshaper {
241        ArabicReshaper::new(self)
242    }
243
244    /// Create a new [ReshaperConfig] based on the input **true type font** font.\
245    /// Keep in mind that we are currently using `ttf-parser` crate for parsing ttf
246    /// files, this crate doesn't support cmap8, this may change in future.
247    // This function need more testing, I haven't tested well yet.
248    #[cfg(feature = "ttf-parser")]
249    pub fn from_font(
250        bytes: &[u8],
251        language: Language,
252        ligatures_flags: LigaturesFlags,
253    ) -> Result<Self, alloc::string::String> {
254        use crate::{form::Forms, letters::Letters};
255        use alloc::{string::ToString, vec::Vec};
256        use ttf_parser::Face;
257
258        let font = Face::parse(bytes, 0).map_err(|e| e.to_string())?;
259
260        let mut config = Self {
261            support_ligatures: !ligatures_flags.is_none_enabled(),
262            ..Default::default()
263        };
264
265        if let Some(tables) = font.tables().cmap {
266            'top: for (_, v) in Letters::new(language).0 {
267                for table in tables.subtables {
268                    if v.isolated != '\0' && table.glyph_index(v.isolated as _).is_some() {
269                        continue 'top;
270                    }
271                }
272                config.use_unshaped_instead_of_isolated = true;
273                break;
274            }
275
276            let mut process_ligatures = |ligatures: &[(&'static [&'static str], Forms)]| {
277                for (idx, (_, forms)) in ligatures.iter().enumerate() {
278                    let forms: Vec<_> = [forms.isolated, forms.initial, forms.medial, forms.end]
279                        .into_iter()
280                        .filter(|c| *c != '\0')
281                        .collect();
282                    let mut n = forms.len();
283                    for form in forms {
284                        for table in tables.subtables {
285                            // we are filtering empty string, so it should be ok to just unwrap
286                            if table.glyph_index(form as _).is_some() {
287                                n -= 1;
288                                break;
289                            }
290                        }
291                    }
292                    config.ligatures.list[idx] = n == 0;
293                }
294            };
295
296            if !ligatures_flags.is_none_enabled() {
297                let LigaturesFlags {
298                    sentences_ligatures,
299                    words_ligatures,
300                    letters_ligatures,
301                    ..
302                } = ligatures_flags;
303
304                for (enabled, range) in [
305                    (sentences_ligatures, SENTENCES_LIGATURES_RANGE),
306                    (words_ligatures, WORDS_LIGATURES_RANGE),
307                    (letters_ligatures, LETTERS_LIGATURES_RANGE),
308                ] {
309                    if enabled {
310                        process_ligatures(&LIGATURES[range]);
311                    }
312                }
313            }
314        } else {
315            config.use_unshaped_instead_of_isolated = true;
316            // Im not sure what should I do here, but the best thing that I can think of right now is
317            // to disable all the ligatures
318            config
319                .ligatures
320                .list
321                .iter_mut()
322                .for_each(|enabled| *enabled = false);
323        }
324
325        Ok(config)
326    }
327
328    /// Update the given [LigatureNames].
329    pub fn update_ligature(&mut self, name: LigatureNames, enable: bool) {
330        self.ligatures.list[name as usize] = enable;
331        // enable or disable ligatures if anything is enabled
332        self.support_ligatures = self.ligatures.is_any_enabled();
333    }
334}
335
336/// A simple hack for serialize and deserialize arrays that are bigger then 32.
337/// we have to use this because serde dont have support for const generic in array size...
338#[cfg(feature = "serde")]
339mod arrays {
340    use alloc::vec::Vec;
341    use core::{convert::TryInto, marker::PhantomData};
342
343    use serde::{
344        de::{SeqAccess, Visitor},
345        ser::SerializeTuple,
346        Deserialize, Deserializer, Serialize, Serializer,
347    };
348    pub fn serialize<S: Serializer, T: Serialize, const N: usize>(
349        data: &[T; N],
350        ser: S,
351    ) -> Result<S::Ok, S::Error> {
352        let mut s = ser.serialize_tuple(N)?;
353        for item in data {
354            s.serialize_element(item)?;
355        }
356        s.end()
357    }
358
359    struct ArrayVisitor<T, const N: usize>(PhantomData<T>);
360
361    impl<'de, T, const N: usize> Visitor<'de> for ArrayVisitor<T, N>
362    where
363        T: Deserialize<'de>,
364    {
365        type Value = [T; N];
366
367        fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
368            formatter.write_str(&alloc::format!("an array of length {}", N))
369        }
370
371        #[inline]
372        fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
373        where
374            A: SeqAccess<'de>,
375        {
376            // can be optimized using MaybeUninit
377            let mut data = Vec::with_capacity(N);
378            for _ in 0..N {
379                match (seq.next_element())? {
380                    Some(val) => data.push(val),
381                    None => return Err(serde::de::Error::invalid_length(N, &self)),
382                }
383            }
384            match data.try_into() {
385                Ok(arr) => Ok(arr),
386                Err(_) => unreachable!(),
387            }
388        }
389    }
390    pub fn deserialize<'de, D, T, const N: usize>(deserializer: D) -> Result<[T; N], D::Error>
391    where
392        D: Deserializer<'de>,
393        T: Deserialize<'de>,
394    {
395        deserializer.deserialize_tuple(N, ArrayVisitor::<T, N>(PhantomData))
396    }
397}