org_rust_parser/element/
block.rs

1use std::collections::HashMap;
2
3use crate::node_pool::NodeID;
4use crate::parse::parse_element;
5use crate::types::{Cursor, MatchError, ParseOpts, Parseable, Parser, Result, process_attrs};
6use lazy_static::lazy_static;
7use regex::bytes::Regex;
8
9// regexes that search for various ending tokens on a line that only contains whitespace
10#[rustfmt::skip]
11lazy_static! {
12  static ref CENTER_RE  : Regex = Regex::new(r"(?mi)^[ \t]*#\+end_center[\t ]*$") .unwrap();
13  static ref QUOTE_RE   : Regex = Regex::new(r"(?mi)^[ \t]*#\+end_quote[\t ]*$")  .unwrap();
14  static ref COMMENT_RE : Regex = Regex::new(r"(?mi)^[ \t]*#\+end_comment[\t ]*$").unwrap();
15  static ref EXAMPLE_RE : Regex = Regex::new(r"(?mi)^[ \t]*#\+end_example[\t ]*$").unwrap();
16  static ref EXPORT_RE  : Regex = Regex::new(r"(?mi)^[ \t]*#\+end_export[\t ]*$") .unwrap();
17  static ref SRC_RE     : Regex = Regex::new(r"(?mi)^[ \t]*#\+end_src[\t ]*$")    .unwrap();
18  static ref VERSE_RE   : Regex = Regex::new(r"(?mi)^[ \t]*#\+end_verse[\t ]*$")  .unwrap();
19}
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum Block<'a> {
23    // Greater Blocks
24    Center {
25        parameters: HashMap<&'a str, &'a str>,
26        contents: Vec<NodeID>,
27    },
28    Quote {
29        parameters: HashMap<&'a str, &'a str>,
30        contents: Vec<NodeID>,
31    },
32    Special {
33        parameters: HashMap<&'a str, &'a str>,
34        contents: Vec<NodeID>,
35        name: &'a str,
36    },
37
38    // Lesser Blocks
39    Comment {
40        parameters: HashMap<&'a str, &'a str>,
41        contents: &'a str,
42    },
43    Example {
44        parameters: HashMap<&'a str, &'a str>,
45        contents: &'a str,
46    },
47    Export {
48        backend: Option<&'a str>,
49        parameters: HashMap<&'a str, &'a str>,
50        contents: &'a str,
51    },
52    Src {
53        language: Option<&'a str>,
54        parameters: HashMap<&'a str, &'a str>,
55        contents: &'a str,
56    },
57    Verse {
58        parameters: HashMap<&'a str, &'a str>,
59        contents: &'a str,
60    },
61}
62
63impl<'a> Parseable<'a> for Block<'a> {
64    fn parse(
65        parser: &mut Parser<'a>,
66        mut cursor: Cursor<'a>,
67        parent: Option<crate::node_pool::NodeID>,
68        parse_opts: ParseOpts,
69    ) -> Result<NodeID> {
70        let start = cursor.index;
71        cursor
72            .word("#+begin_")
73            .or_else(|_| cursor.word("#+BEGIN_"))?;
74
75        let block_name_match = cursor.fn_until(|chr: u8| chr.is_ascii_whitespace())?;
76
77        // if no progress was made looking for the block_type:
78        // i.e.: #+begin_\n
79        if cursor.index == block_name_match.end {
80            return Err(MatchError::InvalidLogic);
81        }
82        cursor.index = block_name_match.end;
83        // parse paramters
84        cursor.skip_ws();
85
86        let block_kind: BlockKind = block_name_match.obj.into();
87
88        let mut language: Option<&str> = None;
89        let mut backend: Option<&str> = None;
90        match block_kind {
91            // TODO: reduce duplication here
92            BlockKind::Src => {
93                let lang_match = cursor.fn_until(|chr| chr.is_ascii_whitespace())?;
94                let trimmed = lang_match.obj.trim();
95
96                if trimmed.is_empty() {
97                    language = None;
98                } else {
99                    language = Some(trimmed);
100                }
101                cursor.skip_ws();
102            }
103            BlockKind::Export => {
104                let backend_match = cursor.fn_until(|chr| chr.is_ascii_whitespace())?;
105                let trimmed = backend_match.obj.trim();
106
107                if trimmed.is_empty() {
108                    backend = None;
109                } else {
110                    backend = Some(trimmed);
111                }
112                cursor.skip_ws();
113            }
114            _ => (),
115        }
116        // TODO: src switches
117        let (mut cursor, parameters) = process_attrs(cursor)?;
118        // skip newline
119        cursor.next();
120
121        // have to predeclare these so that the allocated regex
122        // doesn't go out of scope and we can still pull a reference
123        // to it.
124        let alloc_reg;
125
126        // avoid an allocation for pre-known endings
127        let re = if let Some(block_end) = block_kind.to_end() {
128            block_end
129        } else {
130            alloc_reg = Regex::new(&format!(
131                r"(?mi)^[ \t]*#\+end_{}[\t ]*$",
132                block_name_match.obj
133            ))
134            .unwrap();
135            &alloc_reg
136        };
137
138        // Find ending cookie: #+end_{}
139        // lesser blocks: clamp a string between the beginning and end
140        // greater blocks: parse between the bounds
141        // let re = regex::bytes::Regex::new(needle).unwrap();
142        let ret = if let Some(val) = re.find(cursor.rest()) {
143            val
144        } else {
145            Err(MatchError::InvalidLogic)?
146        };
147
148        let loc = ret.start() + cursor.index;
149        let end = ret.end() + cursor.index;
150
151        // handle empty contents
152        // if cursor.index > loc {
153        //     cursor.index = loc;
154        // }
155
156        if block_kind.is_lesser() {
157            let contents = cursor.clamp_forwards(loc);
158            Ok(parser.alloc(
159                match block_kind {
160                    BlockKind::Center | BlockKind::Quote | BlockKind::Special(_) => unreachable!(),
161                    BlockKind::Comment => Block::Comment {
162                        parameters,
163                        contents,
164                    },
165                    BlockKind::Example => Block::Example {
166                        parameters,
167                        contents,
168                    },
169                    BlockKind::Export => Block::Export {
170                        backend,
171                        parameters,
172                        contents,
173                    },
174                    BlockKind::Src => Block::Src {
175                        language,
176                        parameters,
177                        contents,
178                    },
179                    BlockKind::Verse => Block::Verse {
180                        parameters,
181                        contents,
182                    },
183                },
184                start,
185                end,
186                parent,
187            ))
188        } else {
189            let mut contents: Vec<NodeID> = Vec::new();
190            let reserve_id = parser.pool.reserve_id();
191            // REVIEW: janky
192            let mut temp_cursor = cursor.cut_off(loc);
193            while let Ok(element_id) =
194                // use default parseopts since it wouldn't make sense for the contents
195                // of the block to be interpreted as a list, or be influenced from the outside
196                parse_element(parser, temp_cursor, Some(reserve_id), ParseOpts::default())
197            {
198                contents.push(element_id);
199                temp_cursor.index = parser.pool[element_id].end;
200            }
201
202            Ok(parser.alloc_with_id(
203                match block_kind {
204                    BlockKind::Center => Block::Center {
205                        parameters,
206                        contents,
207                    },
208                    BlockKind::Quote => Block::Quote {
209                        parameters,
210                        contents,
211                    },
212                    BlockKind::Special(name) => Block::Special {
213                        parameters,
214                        contents,
215                        name,
216                    },
217                    BlockKind::Comment
218                    | BlockKind::Example
219                    | BlockKind::Export
220                    | BlockKind::Src
221                    | BlockKind::Verse => unreachable!(),
222                },
223                start,
224                end,
225                parent,
226                reserve_id,
227            ))
228        }
229    }
230}
231
232#[derive(Debug, Clone, Copy)]
233enum BlockKind<'a> {
234    // Greater
235    Center,
236    Quote,
237    Special(&'a str), // holds the block kind
238
239    // Leser
240    Comment,
241    Example,
242    Export,
243    Src,
244    Verse,
245}
246
247impl BlockKind<'_> {
248    pub fn is_lesser(&self) -> bool {
249        matches!(
250            self,
251            BlockKind::Comment
252                | BlockKind::Example
253                | BlockKind::Export
254                | BlockKind::Src
255                | BlockKind::Verse
256        )
257    }
258
259    #[rustfmt::skip]
260    fn to_end(self) -> Option<&'static Regex> {
261        match self {
262            BlockKind::Center  => Some(&CENTER_RE ) ,
263            BlockKind::Quote   => Some(&QUOTE_RE  ) ,
264            BlockKind::Comment => Some(&COMMENT_RE) ,
265            BlockKind::Example => Some(&EXAMPLE_RE) ,
266            BlockKind::Export  => Some(&EXPORT_RE ) ,
267            BlockKind::Src     => Some(&SRC_RE    ) ,
268            BlockKind::Verse   => Some(&VERSE_RE  ) ,
269            BlockKind::Special(_) => None,
270        }
271    }
272}
273
274impl<'a> From<&'a str> for BlockKind<'a> {
275    fn from(value: &'a str) -> Self {
276        match value.to_ascii_lowercase().as_str() {
277            "center" => Self::Center,
278            "quote" => Self::Quote,
279            "comment" => Self::Comment,
280            "example" => Self::Example,
281            "export" => Self::Export,
282            "verse" => Self::Verse,
283            "src" => Self::Src,
284            _ => Self::Special(value),
285        }
286    }
287}
288
289impl<'a> From<BlockKind<'a>> for &'a str {
290    fn from(value: BlockKind<'a>) -> Self {
291        match value {
292            BlockKind::Center => "center",
293            BlockKind::Quote => "quote",
294            BlockKind::Special(val) => val,
295            BlockKind::Comment => "comment",
296            BlockKind::Example => "example",
297            BlockKind::Export => "export",
298            BlockKind::Src => "src",
299            BlockKind::Verse => "verse",
300        }
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use std::collections::HashMap;
307
308    use crate::element::Block;
309    use crate::types::Expr;
310    use crate::{expr_in_pool, parse_org};
311
312    use pretty_assertions::assert_eq;
313
314    #[test]
315    fn test_basic_block() {
316        let input = "#+begin_export\n#+end_export\n";
317
318        let parsed = parse_org(input);
319        let l = expr_in_pool!(parsed, Block).unwrap();
320
321        assert_eq!(
322            l,
323            &Block::Export {
324                backend: None,
325                parameters: HashMap::new(),
326                contents: r""
327            }
328        )
329    }
330    #[test]
331    fn test_special_block() {
332        let input = "#+begin_rainbow\n#+end_rainbow\n";
333
334        let parsed = parse_org(input);
335        let l = expr_in_pool!(parsed, Block).unwrap();
336
337        assert_eq!(
338            l,
339            &Block::Special {
340                parameters: HashMap::new(),
341                contents: Vec::new(),
342                name: "rainbow"
343            }
344        )
345    }
346    #[test]
347    fn test_src_block() {
348        let input = "#+begin_src python\n#+end_src\n";
349
350        let parsed = parse_org(input);
351        let l = expr_in_pool!(parsed, Block).unwrap();
352
353        assert_eq!(
354            l,
355            &Block::Src {
356                language: Some("python"),
357                parameters: HashMap::new(),
358                contents: ""
359            }
360        )
361    }
362
363    #[test]
364    fn test_block_params() {
365        let input = "#+begin_example :gotta :love :examples\n#+end_example\n";
366
367        let parsed = parse_org(input);
368        let l = expr_in_pool!(parsed, Block).unwrap();
369
370        assert_eq!(
371            l,
372            &Block::Example {
373                parameters: HashMap::from([("gotta", ""), ("love", ""), ("examples", "")]),
374                contents: ""
375            }
376        )
377    }
378
379    #[test]
380    fn test_lesser_block_content() {
381        let input = "#+begin_example gotta love examples\nsmallexp\n#+end_example\n";
382
383        let parsed = parse_org(input);
384        let l = expr_in_pool!(parsed, Block).unwrap();
385
386        assert_eq!(
387            l,
388            &Block::Example {
389                parameters: HashMap::new(),
390                contents: "smallexp
391"
392            }
393        )
394    }
395
396    #[test]
397    fn test_big_lesser_block_content() {
398        let input = r"#+begin_example
399this is a larger example gotta love examples
400to demonstrate that it works
401string substring
402big
403one two three
404/formatted text? no such thing!/
405*abc*
406#+end_example
407";
408        let parsed = parse_org(input);
409        let l = expr_in_pool!(parsed, Block).unwrap();
410
411        assert_eq!(
412            l,
413            &Block::Example {
414                parameters: HashMap::new(),
415                contents: r"this is a larger example gotta love examples
416to demonstrate that it works
417string substring
418big
419one two three
420/formatted text? no such thing!/
421*abc*
422"
423            }
424        )
425    }
426
427    #[test]
428    fn test_big_greater_block_content() {
429        let input = r"
430#+begin_quote
431
432/formatted text? such thing!/
433*abc*
434
435* headlines too
436anything is possible
437
438blank lines
439
440#+keyword: one
441
442#+begin_src rust
443let nest = Some()
444
445if let Some(nested) = nest {
446    dbg!(meta);
447}
448
449#+end_src
450
451** headline :tag:meow:
452#+end_quote
453";
454        let pool = parse_org(input);
455        pool.print_tree();
456    }
457
458    #[test]
459    fn block_ending_proper() {
460        let input = r"
461
462text before
463#+begin_src python
464
465here is some text
466#+end_src
467
468here is after
469
470";
471
472        let parsed = parse_org(input);
473        let l = expr_in_pool!(parsed, Block).unwrap();
474
475        assert_eq!(
476            l,
477            &Block::Src {
478                language: Some("python"),
479                parameters: HashMap::new(),
480                contents: r"
481here is some text
482"
483            }
484        )
485    }
486
487    #[test]
488    fn lesser_block_indented() {
489        let input = r"
490             #+begin_example
491             we are eating so good?
492             #+end_example
493";
494
495        let parsed = parse_org(input);
496        let l = expr_in_pool!(parsed, Block).unwrap();
497
498        assert_eq!(
499            l,
500            &Block::Example {
501                parameters: HashMap::new(),
502                contents: r"             we are eating so good?
503"
504            }
505        )
506    }
507
508    #[test]
509    fn greater_block_indented() {
510        let input = r"
511             #+begin_swag
512             we are eating so good?
513             #+end_swag
514";
515
516        let pool = parse_org(input);
517        pool.print_tree();
518    }
519
520    #[test]
521    fn gblock_plus_list() {
522        let input = r"
523- a
524   #+begin_quote
525hiiiiiiiiiiiiiiiiiii
526   #+end_quote
527-
528";
529
530        let pool = parse_org(input);
531        pool.print_tree();
532    }
533
534    #[test]
535    fn lblock_plus_list() {
536        let input = r"
537-
538   #+begin_src
539
540
541hiiiiiiiiiiiiiiiiiii
542
543text
544   #+end_src
545
546-
547";
548        let parsed = parse_org(input);
549        let l = expr_in_pool!(parsed, Block).unwrap();
550
551        assert_eq!(
552            l,
553            &Block::Src {
554                language: None,
555                parameters: HashMap::new(),
556                contents: r"
557
558hiiiiiiiiiiiiiiiiiii
559
560text
561"
562            }
563        )
564    }
565
566    #[test]
567    fn caps() {
568        let input = r"
569#+BEGIN_VERSE
570text
571#+END_VERSE
572";
573        let parsed = parse_org(input);
574        let l = expr_in_pool!(parsed, Block).unwrap();
575
576        assert_eq!(
577            &Block::Verse {
578                parameters: HashMap::new(),
579                contents: r"text
580"
581            },
582            l
583        )
584    }
585    #[test]
586    fn caps_space() {
587        let input = r"
588#+BEGIN_COMMENT
589                                                text
590                #+END_COMMENT
591";
592        let parsed = parse_org(input);
593        let l = expr_in_pool!(parsed, Block).unwrap();
594
595        assert_eq!(
596            &Block::Comment {
597                parameters: HashMap::new(),
598                contents: r"                                                text
599"
600            },
601            l
602        )
603    }
604}