org_rust_parser/element/
keyword.rs

1use crate::constants::{DOLLAR, HYPHEN, NEWLINE, UNDERSCORE};
2use crate::node_pool::NodeID;
3use crate::parse::parse_element;
4use crate::types::{Cursor, Expr, MatchError, ParseOpts, Parseable, Parser, Result, process_attrs};
5use crate::utils::{Match, bytes_to_str};
6
7use super::Paragraph;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub struct Keyword<'a> {
11    pub key: &'a str,
12    pub val: &'a str,
13}
14
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum Affiliated<'a> {
17    Name(Option<NodeID>),
18    Caption(NodeID), // inside is a paragraph
19    Attr {
20        child_id: Option<NodeID>,
21        backend: &'a str,
22        val: &'a str,
23    },
24}
25
26impl<'a> Parseable<'a> for Keyword<'a> {
27    fn parse(
28        parser: &mut Parser<'a>,
29        mut cursor: Cursor<'a>,
30        parent: Option<NodeID>,
31        parse_opts: ParseOpts,
32    ) -> Result<NodeID> {
33        let start = cursor.index;
34        cursor.word("#+")?;
35
36        // ,#+attr_html: :class one :class one two three four :attr :attr1
37        if cursor.word("attr_").is_ok() | cursor.word("ATTR_").is_ok() {
38            let backend = cursor.fn_until(|chr: u8| chr == b':' || chr.is_ascii_whitespace())?;
39            cursor.index = backend.end;
40            cursor.word(":")?;
41
42            // val is in the form
43            // :key val :key val :key val
44            let val_start_ind = cursor.index;
45            let (mut cursor, new_attrs) = process_attrs(cursor)?;
46            let val = cursor.clamp_backwards(val_start_ind);
47            // skip past newline
48            cursor.next();
49            let end = cursor.index;
50
51            let lowercase_backend = backend.obj.to_ascii_lowercase();
52            let child_id = loop {
53                if let Ok(child_id) = parse_element(parser, cursor, parent, parse_opts) {
54                    let node = &mut parser.pool[child_id];
55                    if let Expr::Affiliated(aff) = &node.obj {
56                        // skip affiliated objects
57                        cursor.index = node.end;
58                    } else {
59                        node.attrs
60                            .entry(lowercase_backend)
61                            .and_modify(|attr_map| {
62                                for (key, item) in &new_attrs {
63                                    attr_map.insert(key, item);
64                                }
65                            })
66                            .or_insert(new_attrs);
67                        break Some(child_id);
68                    }
69                } else {
70                    break None;
71                };
72            };
73
74            return Ok(parser.alloc(
75                Affiliated::Attr {
76                    child_id,
77                    backend: backend.obj,
78                    val: val.trim(),
79                },
80                start,
81                end,
82                parent,
83            ));
84        }
85        let key_word = cursor.fn_until(|chr: u8| chr == b':' || chr.is_ascii_whitespace())?;
86        // TODO warning
87        // not valid: #+: ...
88        if key_word.len() == 0 {
89            Err(MatchError::InvalidLogic)?
90        }
91        cursor.index = key_word.end;
92        cursor.word(":")?;
93
94        // keywords are pure ascii so use the cheaper option
95        match key_word.obj.to_ascii_lowercase().as_str() {
96            "macro" => {
97                if let Ok(mac) = MacroDef::parse(cursor) {
98                    // HACK: we're duplicating the mac object
99                    let nam = mac.obj.name;
100                    let id = parser.pool.alloc(mac.obj.clone(), start, mac.end, parent);
101                    parser.macros.insert(nam, mac.obj);
102                    return Ok(id);
103                }
104            }
105            "name" => {
106                let prev = cursor.index;
107                cursor.adv_till_byte(NEWLINE);
108                // not mentioned in the spec, but org-element trims
109                let val = bytes_to_str(cursor.byte_arr[prev..cursor.index].trim_ascii());
110
111                cursor.next();
112                let end_index = cursor.index;
113
114                let child_id = loop {
115                    if let Ok(child_id) = parse_element(parser, cursor, parent, parse_opts) {
116                        let node = &mut parser.pool[child_id];
117                        if let Expr::Affiliated(aff) = &node.obj {
118                            // skip affiliated objects
119                            cursor.index = node.end;
120                        } else {
121                            parser.pool[child_id].id_target = Some(parser.generate_target(val));
122                            break Some(child_id);
123                        }
124                    } else {
125                        break None;
126                    };
127                };
128                let ret_id = parser.alloc(Affiliated::Name(child_id), start, end_index, parent);
129
130                return Ok(ret_id);
131            }
132            "caption" => {
133                let val = cursor.fn_until(|chr: u8| chr == b'\n')?;
134                let caption_id = parser.pool.reserve_id();
135                let temp_cursor = cursor.cut_off(val.end);
136                let ret = Paragraph::parse(parser, temp_cursor, Some(caption_id), parse_opts)?;
137                parser.alloc_with_id(
138                    Affiliated::Caption(ret),
139                    start,
140                    val.end + 1,
141                    parent,
142                    caption_id,
143                );
144
145                cursor.index = val.end;
146                cursor.word("\n")?;
147                let child_id = loop {
148                    let c_id = parse_element(parser, cursor, parent, parse_opts)?;
149
150                    if matches!(&parser.pool[c_id].obj, Expr::Affiliated(_)) {
151                        // skip affiliated objects
152                        cursor.index = parser.pool[c_id].end;
153                        continue;
154                    }
155
156                    // HACK: normally would just do inspection and mutation in one go,
157                    // but we'd be taking a mutable/immutable ref to the parser pool.
158                    //
159                    // split it up into two (with this enum to indicate behaviour) to
160                    // work around that
161                    enum Operation {
162                        CaptionImage(NodeID),
163                        Table,
164                        None,
165                    }
166
167                    // inspection phase
168                    let operation = match &parser.pool[c_id].obj {
169                        Expr::Paragraph(par) if par.is_image(parser) => {
170                            Operation::CaptionImage(par.0[0])
171                        }
172                        Expr::Table(_) => Operation::Table,
173                        _ => Operation::None,
174                    };
175
176                    // mutation phase
177                    match operation {
178                        Operation::CaptionImage(link_id) => {
179                            if let Expr::RegularLink(link) = &mut parser.pool[link_id].obj {
180                                link.caption = Some(caption_id);
181                            }
182                        }
183                        Operation::Table => {
184                            if let Expr::Table(table) = &mut parser.pool[c_id].obj {
185                                table.caption = Some(caption_id);
186                            }
187                        }
188                        Operation::None => {
189                            // TODO: warning system
190                            dbg!("caption applied to invalid object");
191                        }
192                    }
193                    break c_id;
194                }; // caption end
195
196                return Ok(child_id);
197            }
198            _ => {}
199        }
200
201        // not mentioned in the spec, but org-element trims
202        let val = cursor.fn_until(|chr: u8| chr == b'\n')?;
203        let trimmed = val.obj.trim_ascii();
204
205        parser.keywords.insert(key_word.obj, trimmed);
206        Ok(parser.alloc(
207            Keyword {
208                key: key_word.obj,
209                val: trimmed,
210            },
211            start,
212            val.end + 1,
213            parent,
214        ))
215    }
216}
217
218#[derive(Debug, Clone, PartialEq, Eq)]
219pub struct MacroDef<'a> {
220    // Highest ArgNum
221    pub num_args: u32,
222    pub input: Vec<ArgNumOrText<'a>>,
223    pub name: &'a str,
224}
225
226#[derive(Debug, Clone, Copy, PartialEq, Eq)]
227pub enum ArgNumOrText<'a> {
228    Text(&'a str),
229    ArgNum(u32),
230}
231
232impl<'a> MacroDef<'a> {
233    pub(crate) fn parse(mut cursor: Cursor<'a>) -> Result<Match<Self>> {
234        let start = cursor.index;
235        // we start just after the colon
236        // #+macro: NAME INNER
237        // INNER: words $1 is an argument $2 is another
238        cursor.skip_ws();
239        // A string starting with a alphabetic character followed by any number of
240        // alphanumeric characters, hyphens and underscores (-_).
241        if !cursor.try_curr()?.is_ascii_alphabetic() || cursor.curr() == NEWLINE {
242            return Err(MatchError::InvalidLogic);
243        }
244
245        let name_match = cursor.fn_while(|chr: u8| {
246            chr.is_ascii_alphanumeric() || chr == HYPHEN || chr == UNDERSCORE
247        })?;
248        cursor.index = name_match.end;
249
250        cursor.skip_ws();
251        // macro with no body?
252        if cursor.try_curr()? == NEWLINE {
253            return Err(MatchError::InvalidLogic);
254        }
255
256        // let inner_match = cursor.fn_until(|chr: u8| chr.is_ascii_whitespace())?;
257        let mut prev_ind = cursor.index;
258        let mut ret_vec: Vec<ArgNumOrText> = Vec::new();
259        let mut num_args = 0;
260        loop {
261            match cursor.try_curr()? {
262                DOLLAR => {
263                    if cursor.peek(1)?.is_ascii_digit() {
264                        ret_vec.push(ArgNumOrText::Text(cursor.clamp_backwards(prev_ind)));
265                        // TODO: only supports 9 args rn
266                        // parse numbers
267
268                        let arg_ident = (cursor.peek(1)? - 48) as u32;
269                        num_args = num_args.max(arg_ident);
270                        ret_vec.push(ArgNumOrText::ArgNum(arg_ident));
271                        // skip past dollar and number
272                        cursor.index += 2;
273                        prev_ind = cursor.index;
274                    } else {
275                        cursor.next();
276                    }
277                }
278                NEWLINE => {
279                    ret_vec.push(ArgNumOrText::Text(cursor.clamp_backwards(prev_ind)));
280                    break;
281                }
282                _ => {
283                    cursor.next();
284                }
285            }
286        }
287
288        Ok(Match {
289            start,
290            end: cursor.index + 1,
291            obj: Self {
292                input: ret_vec,
293                num_args,
294                name: name_match.obj,
295            },
296        })
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use std::collections::HashMap;
303
304    use crate::{
305        element::{Affiliated, Keyword},
306        expr_in_pool, node_in_pool, parse_org,
307        types::Expr,
308    };
309
310    #[test]
311    fn basic_keyword() {
312        let inp = "#+key:val\n";
313        let parsed = parse_org(inp);
314
315        let k = parsed
316            .pool
317            .iter()
318            .find_map(|x| {
319                if let Expr::Keyword(k) = x.obj {
320                    Some(k)
321                } else {
322                    None
323                }
324            })
325            .unwrap();
326
327        assert_eq!(
328            k,
329            Keyword {
330                key: "key",
331                val: "val"
332            }
333        )
334    }
335
336    #[test]
337    fn keyword_ignore_space() {
338        let inp = "#+key:                \t    \t              val\n";
339
340        let parsed = parse_org(inp);
341
342        let k = parsed
343            .pool
344            .iter()
345            .find_map(|x| {
346                if let Expr::Keyword(k) = x.obj {
347                    Some(k)
348                } else {
349                    None
350                }
351            })
352            .unwrap();
353
354        assert_eq!(
355            k,
356            Keyword {
357                key: "key",
358                val: "val"
359            }
360        )
361    }
362
363    #[test]
364    fn keyword_ignore_space_nl() {
365        let inp = "#+key:     \nval\n";
366
367        let parsed = parse_org(inp);
368
369        let k = expr_in_pool!(parsed, Keyword).unwrap();
370
371        assert_eq!(
372            k,
373            &Keyword {
374                key: "key",
375                val: ""
376            }
377        )
378    }
379
380    #[test]
381    fn attr_backend_affiliated_keyword() {
382        // check for spaces, whitespace between val, black vals and multiple attrs
383        let input = r"
384#+attr_html: :black yes        :class :words    multiple spaces accepted
385|table
386";
387        let parsed = parse_org(input);
388        let table = &node_in_pool!(parsed, Table).unwrap().attrs["html"];
389
390        assert_eq!(
391            table,
392            &HashMap::from([
393                ("black", "yes"),
394                ("class", ""),
395                ("words", "multiple spaces accepted"),
396            ])
397        );
398    }
399
400    #[test]
401    fn caption_with_children() {
402        let input = r#"
403
404#+caption:*hi*
405[[yeah]]
406
407"#;
408
409        let parsed = parse_org(input);
410        let cap = expr_in_pool!(parsed, Affiliated).unwrap();
411
412        match cap {
413            Affiliated::Caption(id) => {
414                let Expr::Paragraph(para) = &parsed.pool[*id].obj else {
415                    unreachable!()
416                };
417                let Expr::Bold(bold_obj) = &parsed.pool[para.0[0]].obj else {
418                    unreachable!()
419                };
420                let Expr::Plain(letters) = &parsed.pool[bold_obj.0[0]].obj else {
421                    unreachable!()
422                };
423                assert_eq!(letters, &"hi");
424
425                let cap = expr_in_pool!(parsed, RegularLink).unwrap();
426            }
427            _ => {
428                panic!("oops")
429            }
430        }
431    }
432
433    #[test]
434    fn affiliated_name() {
435        let input = r"
436
437#+CAPTION: this is a list
438#+NAME: yes_my_list
439- yes
440
441#+name: yes_my_list
442[[yes_my_list]]
443";
444
445        let parsed = parse_org(input);
446        assert_eq!(
447            parsed.targets.get("yes_my_list").unwrap(),
448            &"yes_my_list".into()
449        );
450        assert_eq!(parsed.target_occurences.get("yes_my_list").unwrap(), &1);
451        // parsed.print_tree();
452    }
453
454    #[test]
455    fn macro_eof() {
456        let i1 = r"#+macro:";
457        let i2 = r"#+macro: name";
458        let i3 = r"#+macro: name ";
459        let i4 = r"#+macro: name thing";
460        let inps = vec![i1, i2, i3, i4];
461        inps.iter().for_each(|x| {
462            parse_org(x);
463        });
464    }
465}