1use core::ops::RangeInclusive;
2
3use alloc::{collections::BTreeMap, string::String, vec::Vec};
4
5use crate::{
6 config::ReshaperConfig,
7 form::LetterForm,
8 letters::{
9 letters_db::{TATWEEL, ZWJ},
10 *,
11 },
12 ligatures::*,
13};
14
15const EMPTY: (char, LetterForm) = ('\0', LetterForm::Unsupported);
16
17static HARAKAT_RE: [RangeInclusive<char>; 9] = [
18 '\u{0610}'..='\u{061a}',
19 '\u{064b}'..='\u{065f}',
20 '\u{0670}'..='\u{0670}',
21 '\u{06d6}'..='\u{06dc}',
22 '\u{06df}'..='\u{06e8}',
23 '\u{06ea}'..='\u{06ed}',
24 '\u{08d4}'..='\u{08e1}',
25 '\u{08d4}'..='\u{08ed}',
26 '\u{08e3}'..='\u{08ff}',
27];
28
29#[derive(Default, Clone)]
32pub struct ArabicReshaper {
33 config: ReshaperConfig,
34 letters: Letters,
35}
36
37impl ArabicReshaper {
38 pub const fn new(config: ReshaperConfig) -> Self {
40 Self {
41 letters: Letters::new(config.language),
42 config,
43 }
44 }
45
46 pub fn need_reshape<S>(&self, text: S) -> bool
48 where
49 S: AsRef<str>,
50 {
51 text.as_ref().chars().any(|c| self.letters.contains_key(&c))
52 }
53
54 pub fn reshape<S>(&self, text: S) -> String
56 where
57 S: AsRef<str>,
58 {
59 let text = text.as_ref();
60
61 if text.is_empty() {
62 return String::new();
63 }
64
65 let ReshaperConfig {
66 delete_harakat,
67 shift_harakat_position,
68 delete_tatweel,
69 support_zwj,
70 use_unshaped_instead_of_isolated,
71 support_ligatures,
72 ..
73 } = self.config;
74
75 let isolated_form = match use_unshaped_instead_of_isolated {
76 true => LetterForm::Unshaped,
77 false => LetterForm::Isolated,
78 };
79
80 let mut output = Vec::new();
81 let mut position_harakat: BTreeMap<isize, Vec<char>> = BTreeMap::new();
82
83 for letter in text.chars() {
84 if HARAKAT_RE.iter().any(|h| h.contains(&letter)) {
85 if !delete_harakat {
86 let mut position = (output.len() - 1) as isize;
87 if shift_harakat_position {
88 position -= 1
89 }
90
91 let entry = position_harakat.entry(position).or_default();
92
93 if shift_harakat_position {
94 entry.insert(0, letter);
95 } else {
96 entry.push(letter);
97 }
98 }
99 } else if letter == TATWEEL && delete_tatweel || letter == ZWJ && !support_zwj {
100 } else if !self.letters.contains_key(&letter) {
101 output.push((letter, LetterForm::Unsupported))
102 } else if output.is_empty() {
103 output.push((letter, isolated_form)) } else {
105 let previous_letter = output.last_mut().unwrap();
106 if (previous_letter.1 == LetterForm::Unsupported)
107 || (!self.letters.connects_with_letter_before(letter))
108 || (!self.letters.connects_with_letter_after(previous_letter.0))
109 || (previous_letter.1 == LetterForm::Final
110 && !self
111 .letters
112 .connects_with_letters_before_and_after(previous_letter.0))
113 {
114 output.push((letter, isolated_form));
115 } else if previous_letter.1 == isolated_form {
116 *previous_letter = (previous_letter.0, LetterForm::Initial);
117 output.push((letter, LetterForm::Final));
118 } else {
119 *previous_letter = (previous_letter.0, LetterForm::Medial);
122 output.push((letter, LetterForm::Final));
123 }
124 }
125
126 let len = output.len();
128 if support_zwj && len > 1 && output[len - 2].0 == ZWJ {
129 output.remove(len - 2);
130 }
131 }
132
133 if support_zwj && !output.is_empty() && output.last().unwrap().0 == ZWJ {
134 output.pop();
135 }
136
137 if support_ligatures {
138 let mut text: String = text
140 .chars()
141 .filter(|c| !HARAKAT_RE.iter().any(|r| r.contains(c)))
142 .collect();
143
144 if delete_tatweel {
146 text = text.replace(TATWEEL, "")
147 }
148
149 for ((tmatchs, forms), enabled) in
150 LIGATURES.iter().zip(self.config.ligatures.list.iter())
151 {
152 if !enabled {
153 continue;
154 }
155 for tmatch in *tmatchs {
156 for (idx, m) in text.match_indices(tmatch) {
157 let a = text[..idx].chars().count();
160 let b = text[..idx + m.len()].chars().count();
161
162 let a_form = output[a].1;
163 let b_form = output[b - 1].1;
164 let ligature_form: LetterForm;
165
166 if a_form == isolated_form || a_form == LetterForm::Initial {
176 if b_form == isolated_form || b_form == LetterForm::Final {
177 ligature_form = LetterForm::Isolated;
178 } else {
179 ligature_form = LetterForm::Initial;
180 }
181 } else if b_form == isolated_form || b_form == LetterForm::Final {
182 ligature_form = LetterForm::Final;
183 } else {
184 ligature_form = LetterForm::Medial;
185 }
186
187 if forms.get(ligature_form) == '\0' {
188 continue;
189 }
190
191 output[a] = (forms.get(ligature_form), LetterForm::Unsupported);
192
193 for e in output[a + 1..b].iter_mut() {
194 *e = EMPTY;
195 }
196 }
197 }
198 }
199 }
200
201 let mut result = Vec::with_capacity(text.len());
202
203 if !delete_harakat {
204 if let Some(ph) = position_harakat.get(&-1) {
205 result.extend(ph);
206 }
207 }
208
209 for (i, (letter, form)) in output.into_iter().enumerate() {
210 if letter != '\0' {
211 result.push(self.letters.get_form(letter, form))
212 }
213
214 if !delete_harakat {
215 if let Some(ph) = position_harakat.get(&(i as isize)) {
216 result.extend(ph);
217 }
218 }
219 }
220
221 result.into_iter().collect()
222 }
223
224 pub fn reshape_lines<S, L>(&self, lines: L) -> Vec<String>
226 where
227 S: AsRef<str>,
228 L: AsRef<[S]>,
229 {
230 let lines = lines.as_ref();
231 let mut result = Vec::with_capacity(lines.len());
232 for line in lines {
233 result.push(self.reshape(line.as_ref()));
234 }
235 result
236 }
237
238 pub fn modify_config<F>(&mut self, func: F)
241 where
242 F: FnOnce(&mut ReshaperConfig),
243 {
244 let language_before = self.config.language;
245
246 func(&mut self.config);
247
248 if language_before != self.config.language {
249 self.letters.change_language(self.config.language);
251 }
252 }
253}
254
255impl From<ReshaperConfig> for ArabicReshaper {
256 fn from(value: ReshaperConfig) -> Self {
257 ArabicReshaper::new(value)
258 }
259}