org_rust_parser/object/
sup_sub.rs

1use crate::constants::{BACKSLASH, COMMA, LBRACE, PERIOD, STAR};
2use crate::node_pool::NodeID;
3use crate::parse::parse_object;
4use crate::types::{Cursor, MarkupKind, MatchError, ParseOpts, Parseable, Parser, Result};
5
6/// Handle superscript and subscript
7
8#[derive(Clone, Debug)]
9pub enum PlainOrRec<'a> {
10    Plain(&'a str),
11    Rec(Vec<NodeID>),
12}
13
14macro_rules! parse_nscript {
15    ($name: ident) => {
16        #[derive(Clone, Debug)]
17        pub struct $name<'a>(pub PlainOrRec<'a>);
18
19        impl<'a> Parseable<'a> for $name<'a> {
20            fn parse(
21                parser: &mut Parser<'a>,
22                mut cursor: Cursor<'a>,
23                parent: Option<NodeID>,
24                mut parse_opts: ParseOpts,
25            ) -> Result<NodeID> {
26                if cursor.peek_rev(1)?.is_ascii_whitespace() {
27                    return Err(MatchError::InvalidLogic);
28                }
29                let start = cursor.index;
30                // skip ^ or _
31                cursor.next();
32
33                match cursor.try_curr()? {
34                    LBRACE => {
35                        cursor.next();
36
37                        parse_opts.markup.insert(MarkupKind::SupSub);
38                        let mut content_vec = Vec::new();
39
40                        loop {
41                            match parse_object(parser, cursor, parent, parse_opts) {
42                                Ok(id) => {
43                                    cursor.index = parser.pool[id].end;
44                                    content_vec.push(id);
45                                }
46                                Err(MatchError::MarkupEnd(kind)) => {
47                                    if !kind.contains(MarkupKind::SupSub) {
48                                        return Err(MatchError::InvalidLogic);
49                                    }
50
51                                    let new_id = parser.pool.reserve_id();
52                                    for id in content_vec.iter_mut() {
53                                        parser.pool[*id].parent = Some(new_id)
54                                    }
55
56                                    return Ok(parser.alloc_with_id(
57                                        Self(PlainOrRec::Rec(content_vec)),
58                                        start,
59                                        cursor.index + 1,
60                                        parent,
61                                        new_id,
62                                    ));
63                                }
64                                ret @ Err(_) => {
65                                    return ret;
66                                }
67                            }
68                        }
69                    }
70                    STAR => {
71                        return Ok(parser.alloc(
72                            Superscript(PlainOrRec::Plain(cursor.clamp_forwards(cursor.index + 2))),
73                            start,
74                            cursor.index + 2,
75                            parent,
76                        ));
77                    }
78                    chr if !chr.is_ascii_whitespace() => {
79                        // SIGN
80                        //     Either a plus sign character (+), a minus sign character (-), or the empty string.
81                        // CHARS
82                        //     Either the empty string, or a string consisting of any number of alphanumeric characters,
83                        //     commas, backslashes, and dots.
84                        // FINAL
85                        //     An alphanumeric character.
86
87                        //     all this is saying is that it has to be: alphanumeric,comma,backslash,dots.
88                        //     i don't see why you wouldn't just allow anything.
89
90                        let ret = cursor.fn_while(|chr: u8| {
91                            !chr.is_ascii_whitespace()
92                                && (chr.is_ascii_alphanumeric()
93                                    || chr == COMMA
94                                    || chr == BACKSLASH
95                                    || chr == PERIOD)
96                        })?;
97
98                        cursor.move_to(ret.end);
99
100                        // we won't go back to the start of the file since
101                        // we know we started on an alphanumeric
102                        while !cursor.peek_rev(1)?.is_ascii_alphanumeric() {
103                            cursor.prev();
104                        }
105
106                        if cursor.index <= ret.start {
107                            return Err(MatchError::InvalidLogic);
108                        }
109
110                        return Ok(parser.alloc(
111                            Self(PlainOrRec::Plain(cursor.clamp_backwards(ret.start))),
112                            start,
113                            cursor.index,
114                            parent,
115                        ));
116                    }
117                    _ => return Err(MatchError::InvalidLogic)?,
118                }
119            }
120        }
121    };
122}
123
124parse_nscript!(Subscript);
125parse_nscript!(Superscript);
126
127#[cfg(test)]
128mod tests {
129    use crate::parse_org;
130
131    #[test]
132    fn basic_sup() {
133        let input = r"a^{\smiley}";
134
135        let pool = parse_org(input);
136        pool.print_tree();
137    }
138}