org_rust_parser/object/
latex_frag.rs

1use crate::constants::{
2    BACKSLASH, DOLLAR, LBRACE, LBRACK, LPAREN, NEWLINE, RBRACE, RBRACK, RPAREN,
3};
4use crate::node_pool::NodeID;
5use crate::parse::parse_element;
6use crate::types::{Cursor, MatchError, ParseOpts, Parseable, Parser, Result};
7
8use super::parse_entity;
9
10macro_rules! double_ending {
11    ($parser: ident,
12     $cursor: ident,
13     $start: tt,
14     $parse_opts: ident,
15     $parent: ident,
16     $byte_1: tt, $byte_2: tt,
17     $type: ident
18    ) => {
19        loop {
20            match $cursor.try_curr()? {
21                NEWLINE => {
22                    // the error we return doesn't matter, as long as we error
23                    $parse_opts.from_object = false;
24                    $parse_opts.list_line = false;
25                    if let Err(MatchError::InvalidLogic) =
26                        parse_element($parser, $cursor.adv_copy(1), $parent, $parse_opts)
27                    {
28                        $cursor.next();
29                    } else {
30                        // just blow up REVIEW: find out if it's okay to return InvalidLogic here
31                        return Err(MatchError::EofError);
32                    }
33                }
34                $byte_1 => {
35                    if $cursor.peek(1)? == $byte_2 {
36                        return Ok($parser.alloc(
37                            Self::$type($cursor.clamp_backwards($start + 2)),
38                            $start,
39                            $cursor.index + 2,
40                            $parent,
41                        ));
42                    } else {
43                        $cursor.next();
44                    }
45                }
46                _ => $cursor.next(),
47            }
48        }
49    };
50}
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum LatexFragment<'a> {
54    Command {
55        name: &'a str,
56        contents: Option<&'a str>,
57    },
58    Display(&'a str),
59    Inline(&'a str),
60}
61
62impl<'a> Parseable<'a> for LatexFragment<'a> {
63    fn parse(
64        parser: &mut Parser<'a>,
65        mut cursor: Cursor<'a>,
66        parent: Option<NodeID>,
67        mut parse_opts: ParseOpts,
68    ) -> Result<NodeID> {
69        let start = cursor.index;
70        parse_opts.from_paragraph = true;
71        // figure out which fragment we have
72        if cursor.curr() == DOLLAR {
73            if cursor.peek(1)? == DOLLAR {
74                cursor.index += 2;
75                double_ending!(
76                    parser, cursor, start, parse_opts, parent, DOLLAR, DOLLAR, Display
77                )
78            } else if cursor.peek(2)? == DOLLAR && verify_single_char_latex_frag(cursor) {
79                return Ok(parser.alloc(
80                    Self::Inline(cursor.clamp(cursor.index + 1, cursor.index + 2)),
81                    start,
82                    cursor.index + 3,
83                    parent,
84                ));
85            } else if verify_latex_frag(cursor, false) {
86                cursor.next();
87                loop {
88                    match cursor.try_curr()? {
89                        NEWLINE => {
90                            // the error we return doesn't matter, as long as we error
91                            parse_opts.from_object = false;
92                            parse_opts.list_line = false;
93                            if let Err(MatchError::InvalidLogic) =
94                                parse_element(parser, cursor.adv_copy(1), parent, parse_opts)
95                            {
96                                cursor.next();
97                            } else {
98                                // just blow up REVIEW: find out if it's okay to return InvalidLogic here
99                                return Err(MatchError::EofError);
100                            }
101                        }
102                        DOLLAR => {
103                            if verify_latex_frag(cursor, true) {
104                                return Ok(parser.alloc(
105                                    Self::Inline(cursor.clamp_backwards(start + 1)),
106                                    start,
107                                    cursor.index + 1,
108                                    parent,
109                                ));
110                            } else {
111                                cursor.next();
112                            }
113                        }
114                        _ => cursor.next(),
115                    }
116                }
117            } else {
118                return Err(MatchError::InvalidLogic);
119            }
120        } else if cursor.curr() == BACKSLASH {
121            cursor.next();
122            match cursor.try_curr()? {
123                LPAREN => {
124                    cursor.next();
125                    double_ending!(
126                        parser, cursor, start, parse_opts, parent, BACKSLASH, RPAREN, Inline
127                    )
128                }
129                LBRACK => {
130                    cursor.next();
131                    double_ending!(
132                        parser, cursor, start, parse_opts, parent, BACKSLASH, RBRACK, Display
133                    )
134                }
135                chr if chr.is_ascii_alphabetic() => {
136                    let name_match = cursor.fn_until(|chr| {
137                        !chr.is_ascii_alphabetic()
138                            || chr.is_ascii_whitespace()
139                            || chr == LBRACE
140                            || chr == LBRACK
141                    });
142
143                    let prev_name_ind = cursor.index;
144                    cursor.index = if let Ok(name) = name_match {
145                        name.end
146                    } else {
147                        cursor.len()
148                    };
149                    let end_name_ind = cursor.index;
150                    let name = cursor.clamp(prev_name_ind, end_name_ind);
151
152                    // TODO stop doing everything in LatexFrag
153                    if let Ok(entity) = parse_entity(name) {
154                        return Ok(parser.alloc(entity, start, end_name_ind, parent));
155                    }
156
157                    match cursor.try_curr()? {
158                        LBRACE => {
159                            cursor.next();
160                            loop {
161                                match cursor.try_curr()? {
162                                    NEWLINE | LBRACE => {
163                                        return Err(MatchError::InvalidLogic);
164                                    }
165                                    RBRACE => {
166                                        return Ok(parser.alloc(
167                                            Self::Command {
168                                                name,
169                                                contents: Some(
170                                                    cursor.clamp_backwards(end_name_ind + 1),
171                                                ),
172                                            },
173                                            start,
174                                            cursor.index + 1,
175                                            parent,
176                                        ));
177                                    }
178                                    _ => {}
179                                }
180                                cursor.next();
181                            }
182                        }
183                        LBRACK => {
184                            cursor.next();
185                            loop {
186                                match cursor.try_curr()? {
187                                    NEWLINE | LBRACE | LBRACK | RBRACE => {
188                                        return Err(MatchError::InvalidLogic);
189                                    }
190                                    RBRACK => {
191                                        return Ok(parser.alloc(
192                                            Self::Command {
193                                                name,
194                                                contents: Some(
195                                                    cursor.clamp_backwards(end_name_ind + 1),
196                                                ),
197                                            },
198                                            start,
199                                            cursor.index + 1,
200                                            parent,
201                                        ));
202                                    }
203                                    _ => {}
204                                }
205                                cursor.next();
206                            }
207                        }
208                        _ => {
209                            return Ok(parser.alloc(
210                                Self::Command {
211                                    name,
212                                    contents: None,
213                                },
214                                start,
215                                cursor.index,
216                                parent,
217                            ));
218                        }
219                    }
220                }
221                _ => {
222                    return Err(MatchError::InvalidLogic);
223                }
224            }
225        } else {
226            return Err(MatchError::InvalidLogic);
227        }
228    }
229}
230
231fn verify_latex_frag(cursor: Cursor, post: bool) -> bool {
232    let before_maybe = cursor.peek_rev(1);
233    let after_maybe = cursor.peek(1);
234
235    if post {
236        let before_val = before_maybe.unwrap();
237        // if we're in post, then a character before the markup Must Exist
238        (!before_val.is_ascii_whitespace() && !matches!(before_val, b'.' | b',' | b'$'))
239            && if let Ok(after) = after_maybe {
240                after.is_ascii_punctuation() || after.is_ascii_whitespace()
241            } else {
242                // no after => valid
243                true
244            }
245    } else if let Ok(after) = after_maybe {
246        !after.is_ascii_whitespace()
247            && !matches!(after, b'.' | b',' | b';' | b'$')
248            && if let Ok(val) = before_maybe {
249                val != DOLLAR
250            } else {
251                // bof is valid
252                true
253            }
254    } else {
255        // if there's no after, cannot be valid markup
256        false
257    }
258}
259
260fn verify_single_char_latex_frag(cursor: Cursor) -> bool {
261    // distances:
262    // 10123
263    // p$i$c
264    //
265    // we are at the dollar
266
267    // handle access this way in case of underflow
268    let pre = cursor.peek_rev(1);
269    // pretty much never going to overflow
270    let post = cursor.peek(3);
271
272    let Ok(inner) = cursor.peek(1) else {
273        return false;
274    };
275
276    !(inner.is_ascii_whitespace() || matches!(inner, b'.' | b',' | b'?' | b';' | b'"'))
277        // both could be dne
278        && if let Ok(after) = post {
279            after.is_ascii_punctuation() || after.is_ascii_whitespace()
280        } else {
281            true
282        }
283        && if let Ok(before) = pre {
284            before != DOLLAR
285        } else {
286            true
287        }
288}
289
290#[cfg(test)]
291mod tests {
292    use crate::{expr_in_pool, object::LatexFragment, parse_org, types::Expr};
293    use pretty_assertions::assert_eq;
294
295    #[test]
296    fn basic_latex_frag() {
297        let input = r"\(abc\)";
298
299        let parsed = parse_org(input);
300        let l = expr_in_pool!(parsed, LatexFragment).unwrap();
301
302        assert_eq!(l, &LatexFragment::Inline("abc"))
303    }
304
305    #[test]
306    fn latex_frag_display() {
307        let input = r"\[abc\]";
308
309        let parsed = parse_org(input);
310        let l = expr_in_pool!(parsed, LatexFragment).unwrap();
311
312        assert_eq!(l, &LatexFragment::Display("abc"))
313    }
314
315    #[test]
316    fn latex_frag_display_dollars() {
317        let input = r"$$abc$$";
318
319        let parsed = parse_org(input);
320        let l = expr_in_pool!(parsed, LatexFragment).unwrap();
321
322        assert_eq!(l, &LatexFragment::Display("abc"))
323    }
324
325    #[test]
326    fn latex_frag_inline_dollar() {
327        let input = r"$abc$";
328
329        let parsed = parse_org(input);
330        let l = expr_in_pool!(parsed, LatexFragment).unwrap();
331
332        assert_eq!(l, &LatexFragment::Inline("abc"))
333    }
334
335    #[test]
336    fn latex_frag_char_inline_dollar() {
337        let input = r"$c$";
338        let parsed = parse_org(input);
339        let l = expr_in_pool!(parsed, LatexFragment).unwrap();
340
341        assert_eq!(l, &LatexFragment::Inline("c"))
342    }
343
344    #[test]
345    fn latex_frag_char_inline_dollar_invalid() {
346        let input = r"$,$";
347        let parsed = parse_org(input);
348        let l = expr_in_pool!(parsed, LatexFragment);
349        assert!(l.is_none())
350
351        // not this
352        // assert_eq!(l, &LatexFragment::Inline(","))
353    }
354
355    #[test]
356    fn latex_frag_command_1() {
357        let input = r"\command{swag}";
358        let parsed = parse_org(input);
359        let l = expr_in_pool!(parsed, LatexFragment).unwrap();
360
361        assert_eq!(
362            l,
363            &LatexFragment::Command {
364                name: "command",
365                contents: Some("swag"),
366            }
367        )
368    }
369    #[test]
370    fn latex_frag_command_2() {
371        let input = r"\command[swag]";
372        let parsed = parse_org(input);
373        let l = expr_in_pool!(parsed, LatexFragment).unwrap();
374
375        assert_eq!(
376            l,
377            &LatexFragment::Command {
378                name: "command",
379                contents: Some("swag"),
380            }
381        )
382    }
383
384    #[test]
385    fn latex_frag_command_3() {
386        let input = r"\command no command!";
387        let parsed = parse_org(input);
388        let l = expr_in_pool!(parsed, LatexFragment).unwrap();
389
390        assert_eq!(
391            l,
392            &LatexFragment::Command {
393                name: "command",
394                contents: None,
395            }
396        )
397    }
398
399    #[test]
400    fn latex_frag_command_4() {
401        // one backslash + invalid char => not a command!
402        let input = r"\) not a command";
403        let pool = parse_org(input);
404
405        pool.print_tree();
406    }
407
408    #[test]
409    fn latex_frag_newline() {
410        let input = r"$ab
411
412c$";
413        let parsed = parse_org(input);
414        let l = expr_in_pool!(parsed, LatexFragment);
415        assert!(l.is_none())
416
417        // assert_eq!(l, &LatexFragment::Inline("ab\n\nc"))
418    }
419
420    #[test]
421    fn latex_frag_newline_2() {
422        let input = r"\(ab
423
424c$\)";
425        let parsed = parse_org(input);
426        let l = expr_in_pool!(parsed, LatexFragment);
427        assert!(l.is_none());
428
429        // assert_eq!(l, &LatexFragment::Inline("ab\n\nc"))
430    }
431
432    #[test]
433    fn latex_frag_newline_3() {
434        let input = r"\(ab
435c
436con
437t
438ent
439$\)";
440        let parsed = parse_org(input);
441        let l = expr_in_pool!(parsed, LatexFragment).unwrap();
442
443        assert_eq!(l, &LatexFragment::Inline("ab\nc\ncon\nt\nent\n$"))
444    }
445
446    #[test]
447    fn latex_frag_all() {
448        let input = r"
449$\alpha$ $$do
450llar$$
451\[display
452 block\] \(consecutive gaming\)
453
454\command
455
456\comma
457and
458
459\command{ab
460c}
461
462";
463        let pool = parse_org(input);
464
465        pool.print_tree();
466    }
467
468    #[test]
469    fn latex_frag_pretext() {
470        let input = "one two $three\nfourfive";
471
472        let pool = parse_org(input);
473
474        dbg!(&pool);
475        pool.print_tree();
476    }
477
478    #[test]
479    fn single_backslash_char_eof() {
480        let input = r"   \s";
481        let pool = parse_org(input);
482        let item = expr_in_pool!(pool, Plain).unwrap();
483        assert_eq!(item, &r"\s");
484    }
485}