org_rust_parser/object/sup_sub.rs
1use crate::constants::{BACKSLASH, COMMA, LBRACE, PERIOD, STAR};
2use crate::node_pool::NodeID;
3use crate::parse::parse_object;
4use crate::types::{Cursor, MarkupKind, MatchError, ParseOpts, Parseable, Parser, Result};
5
6/// Handle superscript and subscript
7
8#[derive(Clone, Debug)]
9pub enum PlainOrRec<'a> {
10 Plain(&'a str),
11 Rec(Vec<NodeID>),
12}
13
14macro_rules! parse_nscript {
15 ($name: ident) => {
16 #[derive(Clone, Debug)]
17 pub struct $name<'a>(pub PlainOrRec<'a>);
18
19 impl<'a> Parseable<'a> for $name<'a> {
20 fn parse(
21 parser: &mut Parser<'a>,
22 mut cursor: Cursor<'a>,
23 parent: Option<NodeID>,
24 mut parse_opts: ParseOpts,
25 ) -> Result<NodeID> {
26 if cursor.peek_rev(1)?.is_ascii_whitespace() {
27 return Err(MatchError::InvalidLogic);
28 }
29 let start = cursor.index;
30 // skip ^ or _
31 cursor.next();
32
33 match cursor.try_curr()? {
34 LBRACE => {
35 cursor.next();
36
37 parse_opts.markup.insert(MarkupKind::SupSub);
38 let mut content_vec = Vec::new();
39
40 loop {
41 match parse_object(parser, cursor, parent, parse_opts) {
42 Ok(id) => {
43 cursor.index = parser.pool[id].end;
44 content_vec.push(id);
45 }
46 Err(MatchError::MarkupEnd(kind)) => {
47 if !kind.contains(MarkupKind::SupSub) {
48 return Err(MatchError::InvalidLogic);
49 }
50
51 let new_id = parser.pool.reserve_id();
52 for id in content_vec.iter_mut() {
53 parser.pool[*id].parent = Some(new_id)
54 }
55
56 return Ok(parser.alloc_with_id(
57 Self(PlainOrRec::Rec(content_vec)),
58 start,
59 cursor.index + 1,
60 parent,
61 new_id,
62 ));
63 }
64 ret @ Err(_) => {
65 return ret;
66 }
67 }
68 }
69 }
70 STAR => {
71 return Ok(parser.alloc(
72 Superscript(PlainOrRec::Plain(cursor.clamp_forwards(cursor.index + 2))),
73 start,
74 cursor.index + 2,
75 parent,
76 ));
77 }
78 chr if !chr.is_ascii_whitespace() => {
79 // SIGN
80 // Either a plus sign character (+), a minus sign character (-), or the empty string.
81 // CHARS
82 // Either the empty string, or a string consisting of any number of alphanumeric characters,
83 // commas, backslashes, and dots.
84 // FINAL
85 // An alphanumeric character.
86
87 // all this is saying is that it has to be: alphanumeric,comma,backslash,dots.
88 // i don't see why you wouldn't just allow anything.
89
90 let ret = cursor.fn_while(|chr: u8| {
91 !chr.is_ascii_whitespace()
92 && (chr.is_ascii_alphanumeric()
93 || chr == COMMA
94 || chr == BACKSLASH
95 || chr == PERIOD)
96 })?;
97
98 cursor.move_to(ret.end);
99
100 // we won't go back to the start of the file since
101 // we know we started on an alphanumeric
102 while !cursor.peek_rev(1)?.is_ascii_alphanumeric() {
103 cursor.prev();
104 }
105
106 if cursor.index <= ret.start {
107 return Err(MatchError::InvalidLogic);
108 }
109
110 return Ok(parser.alloc(
111 Self(PlainOrRec::Plain(cursor.clamp_backwards(ret.start))),
112 start,
113 cursor.index,
114 parent,
115 ));
116 }
117 _ => return Err(MatchError::InvalidLogic)?,
118 }
119 }
120 }
121 };
122}
123
124parse_nscript!(Subscript);
125parse_nscript!(Superscript);
126
127#[cfg(test)]
128mod tests {
129 use crate::parse_org;
130
131 #[test]
132 fn basic_sup() {
133 let input = r"a^{\smiley}";
134
135 let pool = parse_org(input);
136 pool.print_tree();
137 }
138}