org_rust_parser/
lib.rs

1// #![allow(dead_code)]
2#![allow(unused_variables)]
3
4pub mod element;
5pub mod object;
6
7pub(crate) mod node_pool;
8pub(crate) mod types;
9pub(crate) mod utils;
10
11mod parse;
12
13pub use node_pool::{NodeID, NodePool};
14pub use types::{Expr, Node, Parser};
15pub use utils::Match;
16
17use std::collections::HashMap;
18
19use parse::{parse_element, parse_object};
20use types::{Cursor, NodeCache, ParseOpts};
21
22#[rustfmt::skip]
23pub(crate) mod constants {
24    pub const SLASH       : u8 = b'/';
25    pub const STAR        : u8 = b'*';
26    pub const POUND       : u8 = b'#';
27    pub const PLUS        : u8 = b'+';
28    pub const HYPHEN      : u8 = b'-';
29    pub const UNDERSCORE  : u8 = b'_';
30    pub const LBRACK      : u8 = b'[';
31    pub const RBRACK      : u8 = b']';
32    pub const LBRACE      : u8 = b'{';
33    pub const RBRACE      : u8 = b'}';
34    pub const COLON       : u8 = b':';
35    pub const SPACE       : u8 = b' ';
36    pub const VBAR        : u8 = b'|';
37    pub const BACKSLASH   : u8 = b'\\';
38    pub const CARET       : u8 = b'^';
39    pub const DOLLAR      : u8 = b'$';
40    pub const TILDE       : u8 = b'~';
41    pub const EQUAL       : u8 = b'=';
42    pub const LANGLE      : u8 = b'<';
43    pub const RANGLE      : u8 = b'>';
44    pub const PERIOD      : u8 = b'.';
45    pub const COMMA       : u8 = b',';
46    pub const NEWLINE     : u8 = b'\n';
47    pub const LPAREN      : u8 = b'(';
48    pub const RPAREN      : u8 = b')';
49}
50
51/// The main entry point to the parser.
52///
53/// Repeatedly parses elements until EOF, then returns a [`Parser`].
54pub fn parse_org(input: &str) -> Parser<'_> {
55    let mut cursor = Cursor::new(input.as_bytes());
56    let parse_opts = ParseOpts::default();
57    let mut pool = NodePool::new();
58    let parent = pool.reserve_id();
59    let mut content_vec: Vec<NodeID> = Vec::new();
60
61    let cache = NodeCache::new();
62    let mut parser = Parser {
63        pool,
64        cache,
65        targets: HashMap::new(),
66        macros: HashMap::new(),
67        keywords: HashMap::new(),
68        target_occurences: HashMap::new(),
69        footnotes: HashMap::new(),
70        source: input,
71    };
72    // main loop
73    while let Ok(id) = parse_element(&mut parser, cursor, Some(parent), parse_opts) {
74        content_vec.push(id);
75        cursor.move_to(parser.pool[id].end);
76    }
77    parser.alloc_with_id(Expr::Root(content_vec), 0, cursor.index, None, parent);
78
79    parser
80}
81
82/// An alternative entry point to the parser for parsing macros.
83///
84/// Unlike [`parse_org`], this function parses objects, not elements.
85pub fn parse_macro_call<'a>(input: &'a str) -> Parser<'a> {
86    let mut cursor = Cursor::new(input.as_bytes());
87    let parse_opts = ParseOpts::default();
88    let mut pool = NodePool::new();
89    let parent = pool.reserve_id();
90    let mut content_vec: Vec<NodeID> = Vec::new();
91
92    // FIXME: pass in keywords + macros
93    let mut parser = Parser {
94        pool,
95        cache: NodeCache::new(),
96        targets: HashMap::new(),
97        macros: HashMap::new(),
98        keywords: HashMap::new(),
99        target_occurences: HashMap::new(),
100        footnotes: HashMap::new(),
101        source: input,
102    };
103    while let Ok(id) = parse_object(&mut parser, cursor, Some(parent), parse_opts) {
104        content_vec.push(id);
105        cursor.move_to(parser.pool[id].end);
106    }
107    parser.alloc_with_id(Expr::Root(content_vec), 0, cursor.index, None, parent);
108
109    parser
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    #[test]
117    fn test_basic_paragraph() {
118        let inp = "hello_world\n";
119
120        dbg!(parse_org(inp));
121    }
122
123    #[test]
124    /// Tests whether we handle unexpected eof
125    fn test_basic_paragraph_no_nl() {
126        let inp = "hello_world";
127
128        dbg!(parse_org(inp));
129    }
130
131    #[test]
132    fn test_basic_paragraph_newline() {
133        let inp = "hello_world\nsame_paragraph\n";
134
135        dbg!(parse_org(inp));
136    }
137
138    #[test]
139    fn test_basic_markup() {
140        let inp = "hello /italic/ more text after\n";
141
142        let pool = parse_org(inp);
143        pool.print_tree();
144    }
145
146    #[test]
147    fn test_newline_in_italic_markup() {
148        let inp = "hello /italic \n newline/ more text after\n";
149
150        let pool = parse_org(inp);
151        pool.print_tree();
152    }
153
154    #[test]
155    fn test_newline_in_verbatim() {
156        let inp = "hello =italic \n newline= more text after\n";
157
158        // dbg!(parse_org(inp));
159        println!("{:?}", parse_org(inp));
160    }
161
162    #[test]
163    fn lots() {
164        let input = r#"
165#+macro: greet Hello $1, nice typing... $1.
166* Basic Heading
167
168{{{greet(user)}}}
169
170** Child Heading
171
172- https://plain_links.com.
173  - <mailto:okiedokie@cool.com>
174    - src_python{(technically) inline_src}
175- [[Child Heading]]
176  - \aleph \leftarrow entities
177
178#+begin_export
179<style type="text/css" media="screen">
180table, th, td {
181  border: 1px solid;
182}
183</style>
184#+end_export
185
186|tables!|[[targets][links to output target]]|styled,, manually :sweat_smile:
187|no|default css (yet)|
188|||||||||table
189
1901. +does+
1912. *it*
1923. /all/
1934. ~code~
1945. =code, again..=
1956. /so _nested_, *t^o_o*./
196
197emojis :flushed: :tada: :sunglasses:
198
199\begin{align}
200x &+ 4\\
201abc &+ 10\\
202\end{align}
203output MathML, little janky atm (might switch to katex..?)
204
205Target here: <<targets>>\\
206
207
208# doesn't look the best, imo
209-----
210
211#+begin_src rust
212nothing styled for source blocks yet, too.
213#+end_src
214
215"#;
216        let pool = parse_org(input);
217        pool.print_tree();
218        dbg!(pool);
219    }
220
221    #[test]
222    fn correctness_cache() {
223        let input = r"
224- one
225- two
226
227--------------
228";
229        let pool = parse_org(input);
230        // dbg!(&pool);
231        pool.print_tree();
232    }
233
234    #[test]
235    fn basic_unicode() {
236        let input = r"é
237";
238
239        let pool = parse_org(input);
240        pool.print_tree();
241    }
242}