xrust_md/
md.rs

1//! A markdown parser for Xrust
2
3#![allow(dead_code)]
4
5use std::rc::Rc;
6use xrust::item::Node;
7use xrust::parser::combinators::alt::{alt2, alt4};
8use xrust::parser::combinators::list::separated_list0;
9use xrust::parser::combinators::many::{many0, many1};
10use xrust::parser::combinators::map::map;
11use xrust::parser::combinators::tag::tag;
12use xrust::parser::combinators::tuple::tuple3;
13use xrust::parser::combinators::whitespace::whitespace0;
14use xrust::parser::{ParseError, ParseInput, ParserState};
15use xrust::trees::smite::RNode;
16use xrust::value::Value;
17use xrust::xdmerror::{Error, ErrorKind};
18
19pub fn parse(input: &str) -> Result<RNode, Error> {
20    let d = RNode::new_document();
21    if input.is_empty() {
22        return Ok(d);
23    }
24    let state = ParserState::new(Some(d.clone()), None, None);
25    md_expr((input.trim(), state))
26        .map(|_| d)
27        .map_err(|e| match e {
28            ParseError::Combinator => Error::new(
29                ErrorKind::ParseError,
30                String::from("unrecoverable parse error"),
31            ),
32            ParseError::NotWellFormed(f) => Error::new(
33                ErrorKind::ParseError,
34                format!("unrecognised extra characters \"{}\"", f),
35            ),
36            _ => Error::new(ErrorKind::Unknown, String::from("unknown error")),
37        })
38}
39
40fn md_expr<N: Node>(input: ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
41    match document()(input) {
42        Err(e) => Err(e),
43        Ok(((input1, state1), n)) => {
44            if input1.is_empty() {
45                Ok(((input1, state1), n))
46            } else {
47                Err(ParseError::NotWellFormed(input1.to_string()))
48            }
49        }
50    }
51}
52
53fn document<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
54    move |input| match separated_list0(map(many1(tag("\n")), |_| ()), block())(input) {
55        Ok(((input1, state1), v)) => {
56            let mut doc = state1.doc().unwrap();
57            let mut top = doc
58                .new_element(state1.get_qualified_name(
59                    None,
60                    None,
61                    state1.get_value(&"article".to_string()),
62                ))
63                .expect("unable to create element");
64            v.iter()
65                .for_each(|c| top.push(c.clone()).expect("unable to add node"));
66            doc.push(top.clone()).expect("unable to add node");
67            Ok(((input1, state1.clone()), top))
68        }
69        Err(err) => Err(err),
70    }
71}
72
73fn block<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
74    alt2(heading(), para())
75}
76
77fn heading<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
78    move |input| match tuple3(many1(tag("#")), whitespace0(), phrases())(input) {
79        Ok(((input1, state1), (h, _, v))) => {
80            let hd_name = format!("heading{}", h.len());
81            let mut h = state1
82                .doc()
83                .unwrap()
84                .new_element(state1.get_qualified_name(None, None, state1.get_value(&hd_name)))
85                .expect("unable to create element");
86            v.iter()
87                .for_each(|c| h.push(c.clone()).expect("unable to add node"));
88            Ok(((input1, state1), h))
89        }
90        Err(err) => Err(err),
91    }
92}
93
94fn strong<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
95    move |input| match tuple3(
96        tag("**"),
97        map(many0(none_of("*")), |v| v.iter().collect::<String>()),
98        tag("**"),
99    )(input)
100    {
101        Ok(((input1, state1), (_, c, _))) => {
102            let mut s = state1
103                .doc()
104                .unwrap()
105                .new_element(state1.get_qualified_name(
106                    None,
107                    None,
108                    state1.get_value(&"emph".to_string()),
109                ))
110                .expect("unable to create element");
111            let att = state1
112                .doc()
113                .unwrap()
114                .new_attribute(
115                    state1.get_qualified_name(None, None, state1.get_value(&"role".to_string())),
116                    state1.get_value(&"strong".to_string()),
117                )
118                .expect("unable to create attribute");
119            s.add_attribute(att).expect("unable to add attribute");
120            let content = state1
121                .doc()
122                .unwrap()
123                .new_text(Rc::new(Value::from(c)))
124                .expect("unable to create text node");
125            s.push(content).expect("unable to add node");
126            Ok(((input1, state1), s))
127        }
128        Err(err) => Err(err),
129    }
130}
131fn underline<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
132    move |input| match tuple3(
133        tag("__"),
134        map(many1(none_of("_")), |v| v.iter().collect::<String>()),
135        tag("__"),
136    )(input)
137    {
138        Ok(((input1, state1), (_, c, _))) => {
139            let mut s = state1
140                .doc()
141                .unwrap()
142                .new_element(state1.get_qualified_name(
143                    None,
144                    None,
145                    state1.get_value(&"emph".to_string()),
146                ))
147                .expect("unable to create element");
148            let att = state1
149                .doc()
150                .unwrap()
151                .new_attribute(
152                    state1.get_qualified_name(None, None, state1.get_value(&"role".to_string())),
153                    state1.get_value(&"underline".to_string()),
154                )
155                .expect("unable to create attribute");
156            s.add_attribute(att).expect("unable to add attribute");
157            let content = state1
158                .doc()
159                .unwrap()
160                .new_text(Rc::new(Value::from(c)))
161                .expect("unable to create text node");
162            s.push(content).expect("unable to add text node");
163            Ok(((input1, state1), s))
164        }
165        Err(err) => Err(err),
166    }
167}
168fn emphasis<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
169    move |input| match tuple3(
170        tag("//"),
171        map(many1(none_of("/")), |v| v.iter().collect::<String>()),
172        tag("//"),
173    )(input)
174    {
175        Ok(((input1, state1), (_, c, _))) => {
176            let mut s = state1
177                .doc()
178                .unwrap()
179                .new_element(state1.get_qualified_name(
180                    None,
181                    None,
182                    state1.get_value(&"emph".to_string()),
183                ))
184                .expect("unable to create element");
185            let content = state1
186                .doc()
187                .unwrap()
188                .new_text(Rc::new(Value::from(c)))
189                .expect("unable to create text node");
190            s.push(content).expect("unable to add text node");
191            Ok(((input1, state1), s))
192        }
193        Err(err) => Err(err),
194    }
195}
196fn phrases<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, Vec<N>), ParseError> {
197    many1(alt4(text_content(), strong(), emphasis(), underline()))
198}
199
200fn text_content<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
201    move |input| match map(many1(none_of("*/_\n")), |v| v.iter().collect::<String>())(input) {
202        Ok(((input1, state1), t)) => {
203            let p = state1
204                .doc()
205                .unwrap()
206                .new_text(Rc::new(Value::from(t)))
207                .expect("unable to create text node");
208            Ok(((input1, state1), p))
209        }
210        Err(err) => Err(err),
211    }
212}
213
214fn para<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
215    move |input| match phrases()(input) {
216        Ok(((input1, state1), v)) => {
217            let mut p = state1
218                .doc()
219                .unwrap()
220                .new_element(state1.get_qualified_name(
221                    None,
222                    None,
223                    state1.get_value(&"para".to_string()),
224                ))
225                .expect("unable to create element");
226            v.iter()
227                .for_each(|c| p.push(c.clone()).expect("unable to add node"));
228            Ok(((input1, state1), p))
229        }
230        Err(err) => Err(err),
231    }
232}
233
234#[allow(dead_code)]
235fn eol<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, ()), ParseError> {
236    map(many1(tag("\n")), |_| ())
237}
238
239// This is copied from xrust::parser::xpath::support
240// TODO: make it a public function exported by xrust
241fn none_of<N: Node>(
242    s: &str,
243) -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, char), ParseError> + '_ {
244    move |(input, state)| {
245        if input.is_empty() {
246            Err(ParseError::Combinator)
247        } else {
248            let mut ch_it = input.char_indices();
249            let (_, a) = ch_it.next().unwrap();
250            match s.find(|b| a == b) {
251                Some(_) => Err(ParseError::Combinator),
252                None => {
253                    if let Some((j, _)) = ch_it.next() {
254                        Ok(((&input[j..], state), a))
255                    } else {
256                        Ok((("", state), a))
257                    }
258                }
259            }
260        }
261    }
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267
268    #[test]
269    fn parse_1() {
270        let d = parse(
271            "# Heading
272A paragraph
273## Level 2
274Some **strong** text
275# Level 1
276Some __underlined__ text
277## Another Level 2
278//Emphasised// text",
279        )
280        .expect("unable to parse MarkDown");
281        assert_eq!(
282            d.to_xml(),
283            "<article><heading1>Heading</heading1><para>A paragraph</para><heading2>Level 2</heading2><para>Some <emph role='strong'>strong</emph> text</para><heading1>Level 1</heading1><para>Some <emph role='underline'>underlined</emph> text</para><heading2>Another Level 2</heading2><para><emph>Emphasised</emph> text</para></article>"
284        )
285    }
286
287    #[test]
288    fn headings_1() {
289        let d = parse(
290            "# Level 1 Heading
291## Level 2 Heading
292### Level 3 Heading
293#### Level 4 Heading
294",
295        )
296        .expect("unable to parse MarkDown");
297        assert_eq!(
298            d.to_xml(),
299            "<article><heading1>Level 1 Heading</heading1><heading2>Level 2 Heading</heading2><heading3>Level 3 Heading</heading3><heading4>Level 4 Heading</heading4></article>"
300        )
301    }
302}