elm_parser/
element_text.rs

1use crate::datacell::{BlockChildType::*, CellTrait::Cell, Datacell::*, ElementCell::*};
2
3pub struct ElementText {
4    pub text: String,
5    rules: Vec<DelimeterRules>,
6}
7
8#[derive(Debug)]
9struct DelimeterRules {
10    symbol: &'static str,
11    end_symbol: &'static str,
12    left_replacement: &'static str,
13    right_replacement: &'static str,
14    no_break: bool,
15    keep_delimiter: bool,
16    ignore_nested_delimeters: bool,
17    keep_escaped_char_when_closed: bool,
18    ignore_when_before: Vec<char>,
19    ignore_when_after: Vec<char>,
20}
21
22impl Default for DelimeterRules {
23    fn default() -> Self {
24        Self {
25            symbol: "",
26            end_symbol: "",
27            left_replacement: "",
28            right_replacement: "",
29            no_break: false,
30            keep_delimiter: false,
31            ignore_nested_delimeters: false,
32            ignore_when_before: Vec::new(),
33            ignore_when_after: Vec::new(),
34            keep_escaped_char_when_closed: false,
35        }
36    }
37}
38
39impl ElementText {
40    pub fn new(text: &str) -> ElementText {
41        ElementText {
42            text: text.to_string(),
43            rules: vec![
44                DelimeterRules {
45                    symbol: "*",
46                    end_symbol: "*",
47                    left_replacement: "<Span bold=true>",
48                    right_replacement: "</Span>",
49                    ignore_when_after: vec!['(', '[', '{', '*', ' '],
50                    ignore_when_before: vec![')', ']', '}', '*', ' '],
51                    ..Default::default()
52                },
53                DelimeterRules {
54                    symbol: "__",
55                    end_symbol: "__",
56                    left_replacement: "<Span italic=true align=Align::Center>",
57                    right_replacement: "</Span>",
58                    ..Default::default()
59                },
60                DelimeterRules {
61                    symbol: "_|",
62                    end_symbol: "|_",
63                    left_replacement: "<Span align=Align::Center>",
64                    right_replacement: "</Span>",
65                    ..Default::default()
66                },
67                DelimeterRules {
68                    symbol: "_",
69                    end_symbol: "_",
70                    left_replacement: "<Span italic=true>",
71                    right_replacement: "</Span>",
72                    ignore_when_after: vec!['(', '[', '{', ' '],
73                    ignore_when_before: vec![')', ']', '}', ' '],
74                    ..Default::default()
75                },
76                DelimeterRules {
77                    symbol: "$$",
78                    end_symbol: "$$",
79                    left_replacement: "<MathBlock>",
80                    right_replacement: "</MathBlock>",
81                    no_break: true,
82                    keep_delimiter: true,
83                    ignore_nested_delimeters: true,
84                    ..Default::default()
85                },
86                DelimeterRules {
87                    symbol: "$",
88                    end_symbol: "$",
89                    left_replacement: "<Math>",
90                    right_replacement: "</Math>",
91                    no_break: true,
92                    keep_delimiter: true,
93                    ignore_nested_delimeters: true,
94                    keep_escaped_char_when_closed: true, // Ex: $ ex \$ $ --> <Math>$ ex \$ $</Math> instead of <Math>$ ex $ $</Math>
95                    ..Default::default()
96                },
97            ],
98        }
99    }
100
101    pub fn handle_delimeters(&self) -> String {
102        let mut i = 0;
103        let mut output = String::new();
104
105        while i <= self.text.len() {
106            let (del, skips, text) = &self.find_next_delimeter(i, false);
107
108            output.push_str(text);
109
110            if del.is_none() {
111                break;
112            }
113
114            i = *skips;
115
116            if i <= self.text.len() {
117                let (found, closing_index, del_content) =
118                    &self.find_closing_delimeter(i, &del.unwrap(), false);
119
120                if !found {
121                    // closing del not found , we push the symbol as normal text and continue
122
123                    output.push_str(&del.unwrap().symbol);
124                    let nested_content = ElementText::new(&del_content).handle_delimeters();
125
126                    output.push_str(nested_content.as_str());
127                    i = *closing_index + 1;
128                    continue;
129                }
130
131                let nested_content = if !del.unwrap().ignore_nested_delimeters {
132                    ElementText::new(&del_content).handle_delimeters()
133                } else {
134                    del_content.to_string()
135                };
136
137                if i <= self.text.len() {
138                    i = closing_index + del.unwrap().end_symbol.len();
139                    let mut char_after_closing_del = "";
140                    if i <= self.text.len() && del.unwrap().no_break {
141                        char_after_closing_del = &self.get_slice(i, i + 1).unwrap_or("");
142                    }
143
144                    if char_after_closing_del != " "
145                        && char_after_closing_del != ""
146                        && del.unwrap().no_break
147                    {
148                        // remove prev chars until we hit a space
149                        let mut removed = String::new();
150                        while output.len() > 0 && output.chars().last().unwrap() != ' ' {
151                            // replace first char of removed
152                            removed = format!("{}{}", output.pop().unwrap(), removed);
153                        }
154                        output.push_str("\"#<span class=\"nobreak\">r#\"");
155                        output.push_str(removed.as_str());
156                        output.push_str("\"#");
157                    } else {
158                        output.push_str("\"#");
159                    }
160                    output.push_str(del.unwrap().left_replacement);
161                    output.push_str("r#\"");
162
163                    if del.unwrap().keep_delimiter {
164                        output.push_str(&del.unwrap().symbol);
165                    }
166
167                    output.push_str(&nested_content);
168
169                    if del.unwrap().keep_delimiter {
170                        output.push_str(&del.unwrap().end_symbol);
171                    }
172                    output.push_str("\"#");
173                    output.push_str(del.unwrap().right_replacement);
174                    if del.unwrap().no_break
175                        && char_after_closing_del != " "
176                        && char_after_closing_del != ""
177                    {
178                        output.push_str("r#\"");
179                        let mut string = "".to_string();
180                        while i < self.text.len()
181                            && self.get_char(i) != " "
182                            && self.get_char(i) != ""
183                        {
184                            string.push_str(self.get_char(i).as_str());
185                            i += 1;
186                        }
187                        let handled_string = self::ElementText::new(&string).handle_delimeters();
188                        i += 1;
189                        output.push_str(&handled_string);
190                        output.push_str("\"#</span>r#\"");
191                    } else {
192                        output.push_str("r#\"");
193                        i += 1;
194                    }
195                }
196            }
197        }
198        output
199    }
200
201    pub fn split_text(&self) -> Vec<BlockChildType> {
202        let mut i = 0;
203        let mut output = Vec::<BlockChildType>::new();
204        while i <= self.text.len() {
205            let (del, skips, text) = &self.find_next_delimeter(i, true);
206
207            output.push(BlockChildType::Text(TextCell {
208                content: text.to_string(),
209                wrapped_with: None,
210            }));
211            if !del.is_some() {
212                break;
213            }
214
215            i = *skips;
216
217            if i <= self.text.len() {
218                let (found, closing_index, del_content) =
219                    &self.find_closing_delimeter(i, &del.unwrap(), true);
220
221                if !found {
222                    // closing del not found , we push the symbol as normal text and continue
223
224                    let last_child = output.pop().unwrap();
225                    match last_child {
226                        BlockChildType::Text(mut t) => {
227                            t.content
228                                .push_str(&format!("{}{}", &del.unwrap().symbol, del_content));
229                            output.push(BlockChildType::Text(t))
230                        }
231                        _ => (),
232                    }
233                    i = *closing_index + 1;
234
235                    continue;
236                }
237
238                if i <= self.text.len() {
239                    i = closing_index + del.unwrap().end_symbol.len();
240
241                    output.push(BlockChildType::Delimited(DelimitedCell {
242                        terminal: del_content.to_owned(),
243                        open_delimeter: del.unwrap().symbol.to_string(),
244                        close_delimeter: del.unwrap().end_symbol.to_string(),
245                        display_type: if del.unwrap().symbol.len() > 1 {
246                            DelimitedDisplayType::BLOCK
247                        } else {
248                            DelimitedDisplayType::INLINE
249                        },
250                        wrapped_with: None,
251                    }));
252
253                    i += 1;
254                }
255            }
256        }
257        output
258    }
259
260    pub fn remove_escapes(&mut self) -> &Self {
261        let mut output = String::new();
262        let mut i = 0;
263        let symbols = self.get_all_symbols();
264        let text = &self.text;
265
266        // remove escape char if it's before a delimiter symbol
267        while i + 1 < text.len() {
268            if self.get_char(i) != "\\" {
269                output.push_str(&self.get_char(i));
270                i += 1;
271                continue;
272            }
273
274            let mut delimiter_escaped = false;
275            symbols.iter().any(|s| {
276                if self.get_char(i + s.len()) == *s {
277                    output.push_str(s);
278                    i += s.len() + 1;
279                    delimiter_escaped = true;
280                    return true;
281                }
282                false
283            });
284            if !delimiter_escaped {
285                output.push_str(&self.get_char(i));
286                i += 1
287            }
288        }
289        self.text = output;
290        self
291    }
292
293    fn find_next_delimeter(
294        &self,
295        mut i: usize,
296        keep_escape_char: bool,
297    ) -> (Option<&DelimeterRules>, usize, String) {
298        let mut found_symbol = "";
299        let mut del: Option<&DelimeterRules> = None;
300        let mut text = "".to_string();
301        let symbols: Vec<&str> = self.rules.iter().map(|d| d.symbol).collect();
302        let mut has_multi_char = false;
303
304        while i <= self.text.len() {
305            let _del = self.rules.iter().find(|d| {
306                if i.checked_sub(d.symbol.len()).is_some() {
307                    d.symbol
308                        == &self
309                            .text
310                            .chars()
311                            .take(i)
312                            .skip(i - d.symbol.len())
313                            .collect::<String>()
314                        && self.text.chars().nth(i - d.symbol.len() + 1).is_some()
315                        && !d
316                            .ignore_when_before
317                            .contains(&self.text.chars().nth(i - d.symbol.len() + 1).unwrap())
318                } else {
319                    false
320                }
321            });
322
323            if _del.is_some() {
324                if symbols.contains(
325                    // in case founded delimiter is same as another delimeter first char e.g $ and $$
326                    &self
327                        .text
328                        .chars()
329                        .take(i + 1)
330                        .skip(i - _del.unwrap().symbol.len())
331                        .collect::<String>()
332                        .as_str(),
333                ) {
334                    i += 1;
335                    has_multi_char = true;
336                    continue;
337                }
338                del = _del;
339                found_symbol = _del.unwrap().symbol;
340                if !has_multi_char {}
341                if self.is_escaped(i - found_symbol.len()) {
342                    let mut nth = text.chars().nth(i);
343                    while nth.is_some() && nth.unwrap() != '\\' {
344                        text.pop();
345                        nth = text.chars().nth(i);
346                    }
347
348                    if !keep_escape_char {
349                        // pop the "\"
350                        text.pop();
351                    }
352
353                    text.push_str(found_symbol); // push again without "\"
354                    i += 1;
355
356                    continue;
357                }
358                break;
359            }
360
361            if i.checked_sub(1).is_some() && !(i == 1 && &self.get_char(i - 1) == " ") {
362                text.push_str(&self.get_char(i - 1));
363            }
364            i += 1;
365        }
366
367        (del, i, text)
368    }
369
370    fn find_closing_delimeter(
371        &self,
372        mut i: usize,
373        found_del: &DelimeterRules,
374        keep_escape_char: bool,
375    ) -> (bool, usize, String) {
376        let end_symbol = found_del.end_symbol;
377        let mut found = false;
378        let mut del_content = "".to_string();
379        let symbols: Vec<&str> = self.rules.iter().map(|d| d.symbol).collect();
380
381        while i < self.text.len() {
382            let is_found = end_symbol
383                == &self
384                    .text
385                    .chars()
386                    .take(i + end_symbol.len())
387                    .skip(i)
388                    .collect::<String>()
389                && !found_del
390                    .ignore_when_after
391                    .contains(&self.text.chars().nth(i - 1).unwrap());
392
393            if !is_found {
394                if (end_symbol
395                    != &self
396                        .text
397                        .chars()
398                        .take(i + end_symbol.len())
399                        .skip(i)
400                        .collect::<String>()
401                    && !self.escape_before_symbol(i, &end_symbol))
402                    || found_del
403                        .ignore_when_after
404                        .contains(&self.text.chars().nth(i - 1).unwrap())
405                {
406                    del_content.push_str(&self.get_char(i));
407                }
408                i += 1;
409
410                continue;
411            }
412            let next_char = &self.text.chars().take(i + 2).skip(i).collect::<String>();
413            if symbols.contains(
414                // in case founded delimiter is same as another delimeter first char e.g $ and $$
415                // this is solution for this case $ xx $$ , we don't want xx $ to be considered as text inside $ $ ,
416                &next_char.as_str(),
417            ) && next_char != &end_symbol
418            {
419                i += 2;
420                del_content.push_str(next_char);
421
422                continue;
423            }
424            found = true;
425            if self.is_escaped(i) {
426                if keep_escape_char || found_del.keep_escaped_char_when_closed {
427                    del_content.push_str("\\");
428                }
429                del_content.push_str(end_symbol);
430                found = false;
431                i += end_symbol.len();
432
433                continue;
434            }
435            break;
436        }
437
438        (found, i, del_content)
439    }
440
441    fn get_char(&self, i: usize) -> String {
442        if let Some(_char) = self.text.chars().nth(i) {
443            _char.to_string()
444        } else {
445            "".to_string()
446        }
447    }
448
449    fn is_escaped(&self, i: usize) -> bool {
450        i > 0 && self.get_char(i - 1) == "\\"
451    }
452
453    fn get_all_symbols(&self) -> Vec<String> {
454        let mut symbols: Vec<String> = self.rules.iter().map(|d| d.symbol.to_string()).collect();
455        let end_symbols: Vec<String> = self
456            .rules
457            .iter()
458            .map(|d| d.end_symbol.to_string())
459            .collect();
460        for end_s in end_symbols {
461            if !symbols.contains(&end_s) {
462                symbols.push(end_s)
463            }
464        }
465        symbols
466    }
467
468    fn escape_before_symbol(&self, i: usize, symbol: &str) -> bool {
469        i > 0
470            && self
471                .get_slice(i + 1, i + 1 + symbol.len())
472                .is_some_and(|s| self.get_all_symbols().contains(&s.to_string()))
473            && self.get_char(i) == "\\"
474    }
475
476    fn get_slice(&self, start: usize, end: usize) -> Option<&str> {
477        assert!(end >= start);
478        let s = &self.text;
479
480        let mut iter = s
481            .char_indices()
482            .map(|(pos, _)| pos)
483            .chain(Some(s.len()))
484            .skip(start)
485            .peekable();
486        let start_pos = *iter.peek()?;
487        for _ in start..end {
488            iter.next();
489        }
490
491        Some(&s[start_pos..*iter.peek()?])
492    }
493}