org_rust_parser/element/
heading.rs

1use std::rc::Rc;
2
3use crate::constants::{COLON, NEWLINE, RBRACK, SPACE, STAR};
4use crate::node_pool::NodeID;
5use crate::parse::{parse_element, parse_object};
6use crate::types::{Cursor, Expr, MatchError, ParseOpts, Parseable, Parser, Result};
7use crate::utils::{bytes_to_str, Match};
8
9use super::{parse_property, PropertyDrawer};
10
11const ORG_TODO_KEYWORDS: [&str; 2] = ["TODO", "DONE"];
12
13// STARS KEYWORD PRIORITY TITLE TAGS
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct Heading<'a> {
16    pub heading_level: HeadingLevel,
17    // Org-Todo type stuff
18    pub keyword: Option<&'a str>,
19    pub priority: Option<Priority>,
20    // plain text of title + parsed nodeIDs
21    // store both to accomdote targets
22    pub title: Option<(&'a str, Vec<NodeID>)>,
23    pub tags: Option<Vec<Tag<'a>>>,
24    pub properties: Option<PropertyDrawer<'a>>,
25    pub children: Option<Vec<NodeID>>,
26}
27
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum Priority {
30    A,
31    B,
32    C,
33    Num(u8),
34}
35
36/// Headline Tag
37///
38/// ```example
39/// * head :tag:
40/// ** child :child:
41/// ```
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub enum Tag<'a> {
44    /// Tag unique to the individual headline.
45    Raw(&'a str),
46    /// NodeID referring to the parent headline.
47    Loc(NodeID),
48}
49
50/// Enum of possible headline levels
51#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
52pub enum HeadingLevel {
53    One,
54    Two,
55    Three,
56    Four,
57    Five,
58    Six,
59}
60
61// Implemented not via `TryFrom` so that `MatchError` can be private
62// while keeping the struct Public
63fn try_heading_levelfrom(value: usize) -> Result<HeadingLevel> {
64    match value {
65        1 => Ok(HeadingLevel::One),
66        2 => Ok(HeadingLevel::Two),
67        3 => Ok(HeadingLevel::Three),
68        4 => Ok(HeadingLevel::Four),
69        5 => Ok(HeadingLevel::Five),
70        6 => Ok(HeadingLevel::Six),
71        _ => Err(MatchError::InvalidLogic),
72    }
73}
74
75impl From<HeadingLevel> for u8 {
76    fn from(value: HeadingLevel) -> Self {
77        match value {
78            HeadingLevel::One => 1,
79            HeadingLevel::Two => 2,
80            HeadingLevel::Three => 3,
81            HeadingLevel::Four => 4,
82            HeadingLevel::Five => 5,
83            HeadingLevel::Six => 6,
84        }
85    }
86}
87impl<'a> Parseable<'a> for Heading<'a> {
88    fn parse(
89        parser: &mut Parser<'a>,
90        mut cursor: Cursor<'a>,
91        parent: Option<NodeID>,
92        parse_opts: ParseOpts,
93    ) -> Result<NodeID> {
94        let start = cursor.index;
95
96        let stars = Heading::parse_stars(cursor)?;
97        let heading_level = stars.obj;
98        cursor.move_to(stars.end);
99
100        // guaranteed to allocate since this is a valid headline. Setup the id
101        let reserved_id = parser.pool.reserve_id();
102
103        let keyword: Option<&str> = if let Ok(keyword_match) = Heading::parse_keyword(cursor) {
104            cursor.move_to(keyword_match.end);
105            Some(keyword_match.obj)
106        } else {
107            None
108        };
109
110        let priority: Option<Priority> = if let Ok(prio_match) = Heading::parse_priority(cursor) {
111            cursor.move_to(prio_match.end);
112            Some(prio_match.obj)
113        } else {
114            None
115        };
116
117        let tag_match = Heading::parse_tag(cursor);
118        // if the tags are valid:
119        // tag_match.start: space
120        // tag_match.end: past newline
121        //
122        // otherwise:
123        //
124        // tag_match.start: newline
125        // tag_match.end: past newline
126        let tags = tag_match.obj;
127
128        // use separate idx and shorten the bottom and top of the byte_arr
129        // to trim
130
131        // try to trim whitespace off the beginning and end of the area
132        // we're searching
133
134        let (title, target) = if let Ok((title, target)) =
135            Heading::parse_title(parser, cursor, tag_match.start, reserved_id, parse_opts)
136        {
137            (title, target)
138        } else {
139            (None, None)
140        };
141
142        // jump past the newline
143        cursor.move_to(tag_match.end);
144
145        // Handle subelements
146
147        let properties = if let Ok(ret) = parse_property(cursor) {
148            cursor.index = ret.end;
149            Some(ret.obj)
150        } else {
151            None
152        };
153
154        let mut section_vec: Vec<NodeID> = Vec::new();
155
156        while let Ok(element_id) = parse_element(parser, cursor, Some(reserved_id), parse_opts) {
157            if let Expr::Heading(ref mut heading) = parser.pool[element_id].obj {
158                if u8::from(heading_level) < u8::from(heading.heading_level) {
159                    if let Some(tag_vec) = &mut heading.tags {
160                        tag_vec.push(Tag::Loc(reserved_id));
161                    } else {
162                        heading.tags = Some(vec![Tag::Loc(reserved_id)]);
163                    }
164                } else {
165                    break;
166                }
167            }
168
169            section_vec.push(element_id);
170            cursor.move_to(parser.pool[element_id].end);
171        }
172
173        let children = if section_vec.is_empty() {
174            None
175        } else {
176            Some(section_vec)
177        };
178
179        let ret_id = parser.alloc_with_id(
180            Self {
181                heading_level,
182                keyword,
183                priority,
184                title,
185                tags,
186                children,
187                properties,
188            },
189            start,
190            cursor.index,
191            parent,
192            reserved_id,
193        );
194        parser.pool[ret_id].id_target = target;
195        Ok(ret_id)
196    }
197}
198
199impl<'a> Heading<'a> {
200    fn parse_stars(cursor: Cursor) -> Result<Match<HeadingLevel>> {
201        let ret = cursor.fn_while(|chr: u8| chr == STAR)?;
202
203        if cursor[ret.end] != SPACE {
204            Err(MatchError::InvalidLogic)
205        } else {
206            let heading_level: HeadingLevel = try_heading_levelfrom(ret.end - cursor.index)?;
207            Ok(Match {
208                start: cursor.index,
209                end: ret.end,
210                obj: heading_level,
211            })
212            // Ok(ret.end);
213        }
214    }
215
216    fn parse_keyword(mut cursor: Cursor) -> Result<Match<&str>> {
217        let start = cursor.index;
218        cursor.skip_ws();
219
220        for (i, val) in ORG_TODO_KEYWORDS.iter().enumerate() {
221            // TODO: read up to a whitespace and determine if it's in phf set for keywords
222            // this is currently O(n), we can make it O(1)
223            if cursor.word(val).is_ok() {
224                // keep going in if not whitespace
225                // because a keyword might be a subset of another,,,
226                if cursor.try_curr()?.is_ascii_whitespace() {
227                    return Ok(Match {
228                        start,
229                        end: cursor.index, // don't move 1 ahead, in case it's a newline
230                        obj: val,
231                    });
232                } else {
233                    cursor.index -= val.len();
234                }
235            }
236        }
237
238        Err(MatchError::InvalidLogic)
239    }
240
241    // Recognizes the following patterns:
242    // [#A]
243    // [#1]
244    // [#12]
245    // TODO: we don't respect the 65 thing for numbers
246    fn parse_priority(mut cursor: Cursor) -> Result<Match<Priority>> {
247        let start = cursor.index;
248        cursor.skip_ws();
249        // TODO: check if this is true
250        // FIXME breaks in * [#A]EOF
251
252        let end_idx;
253        let ret_prio: Priority;
254        cursor.word("[#")?;
255
256        // #[A] OR #[1]
257        if cursor.try_curr()?.is_ascii_alphanumeric() && cursor.peek(1)? == RBRACK {
258            end_idx = cursor.index + 2;
259            ret_prio = match cursor.curr() {
260                b'A' => Priority::A,
261                b'B' => Priority::B,
262                b'C' => Priority::C,
263                num => Priority::Num(num - 48),
264            };
265        }
266        // #[64]
267        else if cursor.curr().is_ascii_digit()
268            && cursor.peek(1)?.is_ascii_digit()
269            && cursor.peek(2)? == RBRACK
270        {
271            end_idx = cursor.index + 3;
272            // convert digits from their ascii rep, then add.
273            // NOTE: all two digit numbers are valid u8, cannot overflow
274            ret_prio = Priority::Num(10 * (cursor.curr() - 48) + (cursor.peek(1)? - 48));
275        } else {
276            return Err(MatchError::InvalidLogic);
277        }
278
279        Ok(Match {
280            start,
281            end: end_idx,
282            obj: ret_prio,
283        })
284    }
285
286    fn parse_tag(mut cursor: Cursor) -> Match<Option<Vec<Tag>>> {
287        // we parse tags backwards
288        let start = cursor.index;
289        cursor.adv_till_byte(NEWLINE);
290        let nl_loc = cursor.index;
291        cursor.prev();
292
293        while cursor.curr() == SPACE {
294            cursor.prev();
295        }
296
297        if cursor.curr() == COLON {
298            let mut clamp_ind = cursor.index;
299            cursor.prev();
300            let mut tag_vec: Vec<Tag> = Vec::new();
301
302            while cursor.index >= start {
303                if cursor.curr().is_ascii_alphanumeric()
304                    | matches!(cursor.curr(), b'_' | b'@' | b'#' | b'%')
305                {
306                    cursor.prev();
307                } else if cursor.curr() == COLON && clamp_ind.abs_diff(cursor.index) > 1 {
308                    let new_str = cursor.clamp(cursor.index + 1, clamp_ind);
309                    tag_vec.push(Tag::Raw(new_str));
310                    clamp_ind = cursor.index;
311                    if cursor[cursor.index - 1] == SPACE {
312                        // end the search
313                        return Match {
314                            start: cursor.index - 1,
315                            end: nl_loc + 1,
316                            obj: Some(tag_vec),
317                        };
318                    } else {
319                        // otherwise, keep going
320                        cursor.prev();
321                    }
322                } else {
323                    // invalid input: reset temp_ind back to end
324                    return Match {
325                        start: nl_loc,
326                        end: nl_loc + 1,
327                        obj: None,
328                    };
329                }
330            }
331        }
332
333        Match {
334            start: nl_loc,
335            end: nl_loc + 1,
336            obj: None,
337        }
338        // we reached the start element, without hitting a space. no tags
339    }
340
341    fn parse_title(
342        parser: &mut Parser<'a>,
343        cursor: Cursor<'a>,
344        mut title_end: usize,
345        reserved_id: NodeID,
346        parse_opts: ParseOpts,
347    ) -> Result<(Option<(&'a str, Vec<NodeID>)>, Option<Rc<str>>)> {
348        while let Some(item) = cursor.get(title_end).copied() {
349            if item == SPACE && title_end > cursor.index {
350                title_end -= 1;
351            } else {
352                break;
353            }
354        }
355        // alternative impl that does not accept titles that experience EOF, keeping here temporarily for posterity
356        // while cursor.get(title_end).ok_or(MatchError::EofError).copied()? == SPACE
357        //     && title_end > cursor.index
358        // {
359        //     title_end -= 1;
360        // }
361
362        let top_off = (title_end + 1).min(cursor.byte_arr.len());
363        let mut temp_cursor = cursor.cut_off(top_off);
364
365        // FIXME: currently repeating work trimming the beginning at skip_ws and with trim_start
366        if bytes_to_str(temp_cursor.rest()).trim_start().is_empty() {
367            Ok((None, None))
368        } else {
369            let mut title_vec: Vec<NodeID> = Vec::new();
370
371            temp_cursor.skip_ws();
372            let title_start = temp_cursor.index;
373            while let Ok(title_id) =
374                parse_object(parser, temp_cursor, Some(reserved_id), parse_opts)
375            {
376                title_vec.push(title_id);
377                temp_cursor.move_to(parser.pool[title_id].end);
378            }
379
380            let title_entry = cursor.clamp(title_start, top_off);
381            let target = Some(parser.generate_target(title_entry));
382
383            Ok((Some((title_entry, title_vec)), target))
384        }
385    }
386}
387
388#[cfg(test)]
389mod tests {
390    use std::borrow::Cow;
391
392    use crate::element::{HeadingLevel, PropertyDrawer, Tag};
393    use crate::node_pool::make_node_id;
394    use crate::types::Expr;
395    use crate::{expr_in_pool, parse_org};
396    use pretty_assertions::assert_eq;
397
398    use super::Heading;
399
400    fn get_head<'a>(input: &'a str) -> Heading<'a> {
401        parse_org(input)
402            .pool
403            .iter()
404            .find_map(|x| {
405                if let Expr::Heading(heading) = &x.obj {
406                    Some(heading)
407                } else {
408                    None
409                }
410            })
411            .cloned()
412            .unwrap()
413    }
414    #[test]
415    fn basic_headline() {
416        let input = "* \n";
417
418        let head = get_head(input);
419        assert_eq!(
420            head,
421            Heading {
422                heading_level: crate::element::HeadingLevel::One,
423                keyword: None,
424                priority: None,
425                title: None,
426                tags: None,
427                properties: None,
428                children: None,
429            }
430        )
431    }
432
433    #[test]
434    fn headline_stars() {
435        let input = "****  \n";
436
437        let head = get_head(input);
438        assert_eq!(
439            head,
440            Heading {
441                heading_level: crate::element::HeadingLevel::Four,
442                keyword: None,
443                priority: None,
444                title: None,
445                tags: None,
446                properties: None,
447                children: None,
448            }
449        )
450    }
451
452    #[test]
453    #[should_panic]
454    fn headline_too_many_stars() {
455        // panics because we'd unwrap on the case of no headings
456        let input = "*********  \n";
457
458        let head = get_head(input);
459    }
460
461    #[test]
462    fn headline_title() {
463        let inp = "*         title                                                \n";
464        let item = get_head(inp);
465        assert_eq!(
466            item,
467            Heading {
468                heading_level: HeadingLevel::One,
469                keyword: None,
470                priority: None,
471                title: Some((
472                    "title                                                \n",
473                    vec![make_node_id(2)]
474                )),
475                tags: None,
476                properties: None,
477                children: None
478            }
479        );
480    }
481
482    #[test]
483    fn headline_keyword() {
484        let input = "* TODO \n";
485
486        let head = get_head(input);
487        assert_eq!(
488            head,
489            Heading {
490                heading_level: crate::element::HeadingLevel::One,
491                keyword: Some("TODO"),
492                priority: None,
493                title: None,
494                tags: None,
495                properties: None,
496                children: None,
497            }
498        )
499    }
500
501    #[test]
502    fn headline_prio() {
503        let input = "* [#A] \n";
504
505        let head = get_head(input);
506        assert_eq!(
507            head,
508            Heading {
509                heading_level: crate::element::HeadingLevel::One,
510                keyword: None,
511                priority: Some(crate::element::Priority::A),
512                title: None,
513                tags: None,
514                properties: None,
515                children: None,
516            }
517        )
518    }
519
520    #[test]
521    fn headline_tag_one() {
522        let inp = "* cat :tagone:\n";
523        let head = get_head(inp);
524
525        assert_eq!(
526            head,
527            Heading {
528                heading_level: crate::element::HeadingLevel::One,
529                keyword: None,
530                priority: None,
531                title: Some(("cat", vec![make_node_id(2)])),
532                tags: Some(vec![Tag::Raw("tagone")]),
533                properties: None,
534                children: None,
535            }
536        );
537    }
538
539    #[test]
540    fn headline_tag_two() {
541        let inp = "* test :tagone:tagtwo:\n";
542        let head = get_head(inp);
543
544        assert_eq!(
545            head,
546            Heading {
547                heading_level: crate::element::HeadingLevel::One,
548                keyword: None,
549                priority: None,
550                title: Some(("test", vec![make_node_id(2)])),
551                tags: Some(vec![Tag::Raw("tagtwo"), Tag::Raw("tagone")]),
552                properties: None,
553                children: None,
554            }
555        );
556    }
557
558    #[test]
559    fn headline_tag_bad_one() {
560        let inp = "* abc one:tagone:tagtwo:\n";
561
562        let parsed = parse_org(inp);
563        let head = expr_in_pool!(parsed, Heading).unwrap();
564        assert_eq!(head.title.as_ref().unwrap().0, "abc one:tagone:tagtwo:\n");
565        assert_eq!(head.tags.as_ref(), None);
566    }
567
568    #[test]
569    fn headline_tag_bad_two() {
570        let inp = "* abc :tagone::\n";
571
572        let parsed = parse_org(inp);
573        let head = expr_in_pool!(parsed, Heading).unwrap();
574        assert_eq!(head.title.as_ref().unwrap().0, "abc :tagone::\n");
575        assert_eq!(head.tags.as_ref(), None);
576    }
577
578    #[test]
579    fn headline_prio_keyword() {
580        let input = "* TODO [#A] \n";
581
582        let head = get_head(input);
583        assert_eq!(
584            head,
585            Heading {
586                heading_level: crate::element::HeadingLevel::One,
587                keyword: Some("TODO"),
588                priority: Some(crate::element::Priority::A),
589                title: None,
590                tags: None,
591                properties: None,
592                children: None,
593            }
594        )
595    }
596
597    #[test]
598    fn headline_prio_keyword_title() {
599        let inp = "* TODO [#A] SWAG \n";
600
601        dbg!(parse_org(inp));
602    }
603
604    #[test]
605    fn headline_prio_keyword_decorated_title() {
606        let inp = "* TODO [#A] *one* two /three/ /four* \n";
607
608        dbg!(parse_org(inp));
609    }
610
611    #[test]
612    fn headline_everything() {
613        let inp = r"* DONE [#0] *one* two /three/ /four*       :one:two:three:four:
614more content here this is a pargraph
615** [#1] descendant headline :five:
616*** [#2] inherit the tags
617** [#3] different level
618subcontent
619this
620
621is a different paragraph
622id) =
623more subcontent
624
625* [#4] separate andy
626";
627
628        let pool = parse_org(inp);
629        pool.print_tree();
630    }
631
632    #[test]
633    fn properties_check() {
634        let input = r"
635* a
636:properties:
637:name: val
638:end:
639
640";
641
642        let head = get_head(input);
643        let got_prop = head.properties.as_ref().unwrap();
644        assert_eq!(
645            got_prop,
646            &PropertyDrawer::from([("name", Cow::from("val"))])
647        );
648
649        let input = r"
650* a
651:properties:
652:name: val
653:name+: val again
654:end:
655
656";
657        let head = get_head(input);
658        let got_prop = head.properties.as_ref().unwrap();
659        assert_eq!(
660            got_prop,
661            &PropertyDrawer::from([("name", Cow::from("val val again"))])
662        );
663    }
664
665    #[test]
666    fn tag_parse() {
667        let input = r"
668* q ac:qbc:
669qqqqq
670
671aaaa";
672
673        let pool = parse_org(input);
674        pool.print_tree();
675    }
676
677    #[test]
678    fn only_stars() {
679        let input = r"*** ";
680        let p = parse_org(input);
681        let item = expr_in_pool!(p, Heading).unwrap();
682
683        assert_eq!(item.heading_level, HeadingLevel::Three);
684    }
685
686    #[test]
687    fn only_stars_and_title() {
688        let input = "*** g";
689        let p = parse_org(input);
690        let item = expr_in_pool!(p, Heading).unwrap();
691
692        assert_eq!(
693            item,
694            &Heading {
695                heading_level: HeadingLevel::Three,
696                keyword: None,
697                priority: None,
698                title: Some(("g", vec![make_node_id(2)])),
699                tags: None,
700                properties: None,
701                children: None
702            }
703        );
704    }
705}