org_rust_parser/object/
markup.rs

1use crate::constants::{EQUAL, NEWLINE, TILDE};
2use crate::node_pool::NodeID;
3use crate::parse::{parse_element, parse_object};
4use crate::types::{Cursor, MarkupKind, MatchError, ParseOpts, Parseable, Parser, Result};
5use crate::utils::verify_markup;
6
7macro_rules! recursive_markup {
8    ($name: tt) => {
9        #[derive(Debug, Clone)]
10        pub struct $name(pub Vec<NodeID>);
11
12        impl<'a> Parseable<'a> for $name {
13            fn parse(
14                parser: &mut Parser<'a>,
15                mut cursor: Cursor<'a>,
16                parent: Option<NodeID>,
17                mut parse_opts: ParseOpts,
18            ) -> Result<NodeID> {
19                if !verify_markup(cursor, false) {
20                    return Err(MatchError::InvalidLogic);
21                }
22                let start = cursor.index;
23                cursor.next();
24
25                parse_opts.from_object = false;
26                parse_opts.markup.insert(MarkupKind::$name);
27
28                let mut content_vec: Vec<NodeID> = Vec::new();
29                loop {
30                    match parse_object(parser, cursor, parent, parse_opts) {
31                        Ok(id) => {
32                            cursor.index = parser.pool[id].end;
33                            content_vec.push(id);
34                        }
35                        Err(MatchError::MarkupEnd(kind)) => {
36                            if !kind.contains(MarkupKind::$name) || cursor.index < start + 2
37                            // prevent ** from being Bold{}
38                            {
39                                return Err(MatchError::InvalidLogic);
40                            }
41
42                            // the markup is going to exist,
43                            // so update the children's parents
44                            let new_id = parser.pool.reserve_id();
45                            for id in content_vec.iter_mut() {
46                                parser.pool[*id].parent = Some(new_id)
47                            }
48
49                            return Ok(parser.alloc_with_id(
50                                Self(content_vec),
51                                start,
52                                cursor.index + 1,
53                                parent,
54                                new_id,
55                            ));
56                        }
57                        ret @ Err(_) => {
58                            return ret;
59                        }
60                    }
61                }
62            }
63        }
64    };
65}
66
67/// $name is the name of the Markup object e.g. Code
68/// $byte is the closing delimeter for the markup object, e.g. TILDE
69macro_rules! plain_markup {
70    ($name: tt, $byte: tt) => {
71
72        #[derive(Debug, Clone, Copy)]
73        pub struct $name<'a>(pub &'a str);
74
75        impl<'a> Parseable<'a> for $name<'a> {
76            fn parse(
77                parser: &mut Parser<'a>,
78                mut cursor: Cursor<'a>,
79                parent: Option<NodeID>,
80                mut parse_opts: ParseOpts,
81            ) -> Result<NodeID> {
82                if !verify_markup(cursor, false) {
83                    return Err(MatchError::InvalidLogic);
84                }
85
86                // skip the opening character, we checked it's valid markup
87                parse_opts.markup.insert(MarkupKind::$name);
88
89                let start = cursor.index;
90                cursor.next();
91
92                loop {
93                    match cursor.try_curr()? {
94                        chr if parse_opts.markup.byte_match(chr) => {
95                            if chr == $byte // check if our closer  is active
96                                && cursor.index > start + 1 // prevent ~~ from being Bold{}
97                                && verify_markup(cursor, true) {
98                                break;
99                            } else {
100                                // FIXME: doesn't handle link end.
101                                // [[___][~abc ] amc~ ]]
102                                // won't make one cohesive code object, the rbrack will
103                                // kill it
104                                return Err(MatchError::MarkupEnd(parse_opts.markup));
105                            }
106                        }
107                        NEWLINE => {
108                            parse_opts.from_paragraph = true;
109                            parse_opts.from_object = false;
110                            parse_opts.list_line = false;
111                            match parse_element(parser, cursor.adv_copy(1), parent, parse_opts) {
112                                Ok(_) => return Err(MatchError::InvalidLogic),
113                                Err(MatchError::InvalidLogic) => {
114                                    cursor.next();
115                                }
116                                ret @ Err(_) => return ret,
117                            }
118                        }
119                        _ => {
120                            cursor.next();
121                        }
122                    }
123                }
124
125                Ok(parser.alloc(
126                    Self(cursor.clamp_backwards(start + 1)),
127                    start,
128                    cursor.index + 1,
129                    parent,
130                ))
131            }
132        }
133    };
134}
135
136recursive_markup!(Italic);
137recursive_markup!(Bold);
138recursive_markup!(StrikeThrough);
139recursive_markup!(Underline);
140
141plain_markup!(Code, TILDE);
142plain_markup!(Verbatim, EQUAL);
143
144#[cfg(test)]
145mod tests {
146    use crate::parse_org;
147
148    #[test]
149    fn basic_verbatim() {
150        let inp = "=hello_world=";
151
152        dbg!(parse_org(inp));
153    }
154
155    #[test]
156    fn basic_code() {
157        let inp = "~hello_world~";
158
159        dbg!(parse_org(inp));
160    }
161    #[test]
162    fn basic_italic() {
163        let inp = "/hello_world/";
164
165        dbg!(parse_org(inp));
166    }
167    #[test]
168    fn basic_bold() {
169        let inp = "*hello_world*";
170
171        dbg!(parse_org(inp));
172    }
173    #[test]
174    fn basic_underline() {
175        let inp = "_hello_world_";
176
177        dbg!(parse_org(inp));
178    }
179    #[test]
180    fn basic_strikethrough() {
181        let inp = "+hello_world+";
182
183        dbg!(parse_org(inp));
184    }
185
186    #[test]
187    fn markup_recursive_empty() {
188        let inp = "**";
189
190        let pool = parse_org(inp);
191        pool.print_tree();
192    }
193
194    #[test]
195    fn markup_plain_empty() {
196        let inp = "~~";
197
198        let pool = parse_org(inp);
199        pool.print_tree();
200    }
201
202    #[test]
203    fn nested_markup() {
204        let inp = "abc /one *two* three/ four";
205
206        let pool = parse_org(inp);
207        pool.print_tree();
208    }
209
210    #[test]
211    fn leaky_markup() {
212        let inp = "abc /one *two thr/ ee* three four";
213
214        let pool = parse_org(inp);
215        pool.print_tree();
216    }
217
218    #[test]
219    fn mixed_plain_recursive_leaky_markup() {
220        let inp = "abc /one ~two thr/ ee~ three four";
221
222        let pool = parse_org(inp);
223        pool.print_tree();
224    }
225    // #[test]
226    // fn
227    #[test]
228    fn markup_not_fail_on_eof() {
229        let inp = "/";
230        let pool = parse_org(inp);
231
232        pool.print_tree();
233    }
234
235    #[test]
236    fn markup_plain_single_char() {
237        // should be valid
238        let inp = "~a~";
239        let pool = parse_org(inp);
240
241        pool.print_tree();
242    }
243
244    #[test]
245    fn markup_recursive_single_char() {
246        // should be valid
247        let inp = "/a/";
248        let pool = parse_org(inp);
249
250        pool.print_tree();
251    }
252}