xrust_md/
md.rs

1//! A markdown parser for Xrust
2
3#![allow(dead_code)]
4
5use std::rc::Rc;
6use xrust::item::Node;
7use xrust::parser::combinators::alt::{alt2, alt4};
8use xrust::parser::combinators::list::separated_list0;
9use xrust::parser::combinators::many::{many0, many1};
10use xrust::parser::combinators::map::map;
11use xrust::parser::combinators::tag::tag;
12use xrust::parser::combinators::tuple::tuple3;
13use xrust::parser::combinators::whitespace::whitespace0;
14use xrust::parser::{ParseError, ParseInput, ParserState};
15use xrust::trees::smite::RNode;
16use xrust::value::Value;
17use xrust::xdmerror::{Error, ErrorKind};
18
19pub fn parse(input: &str) -> Result<RNode, Error> {
20    let d = RNode::new_document();
21    if input.is_empty() {
22        return Ok(d);
23    }
24    let state = ParserState::new(Some(d.clone()), None, None);
25    md_expr((input.trim(), state))
26        .map(|_| d)
27        .map_err(|e| match e {
28            ParseError::Combinator => Error::new(
29                ErrorKind::ParseError,
30                String::from("unrecoverable parse error"),
31            ),
32            ParseError::NotWellFormed(f) => Error::new(
33                ErrorKind::ParseError,
34                format!("unrecognised extra characters \"{}\"", f),
35            ),
36            _ => Error::new(ErrorKind::Unknown, String::from("unknown error")),
37        })
38}
39
40fn md_expr<N: Node>(input: ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
41    match document()(input) {
42        Err(e) => Err(e),
43        Ok(((input1, state1), n)) => {
44            if input1.is_empty() {
45                Ok(((input1, state1), n))
46            } else {
47                Err(ParseError::NotWellFormed(input1.to_string()))
48            }
49        }
50    }
51}
52
53fn document<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
54    move |input| match separated_list0(map(many1(tag("\n")), |_| ()), block())(input) {
55        Ok(((input1, state1), v)) => {
56            let mut doc = state1.doc().unwrap();
57            let mut top = doc
58                .new_element(state1.get_qualified_name(
59                    None,
60                    None,
61                    state1.get_value(&"article".to_string()),
62                ))
63                .expect("unable to create element");
64            v.iter()
65                .for_each(|c| top.push(c.clone()).expect("unable to add node"));
66            doc.push(top.clone()).expect("unable to add node");
67            Ok(((input1, state1.clone()), top))
68        }
69        Err(err) => Err(err),
70    }
71}
72
73fn block<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
74    alt2(heading(), para())
75}
76
77fn heading<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
78    alt2(heading2(), heading1())
79}
80
81fn heading1<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
82    move |input| match tuple3(tag("#"), whitespace0(), phrases())(input) {
83        Ok(((input1, state1), (_, _, v))) => {
84            let mut h = state1
85                .doc()
86                .unwrap()
87                .new_element(state1.get_qualified_name(
88                    None,
89                    None,
90                    state1.get_value(&"heading1".to_string()),
91                ))
92                .expect("unable to create element");
93
94            v.iter()
95                .for_each(|c| h.push(c.clone()).expect("unable to add node"));
96            Ok(((input1, state1), h))
97        }
98        Err(err) => Err(err),
99    }
100}
101fn heading2<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
102    move |input| match tuple3(tag("##"), whitespace0(), phrases())(input) {
103        Ok(((input1, state1), (_, _, v))) => {
104            let mut h = state1
105                .doc()
106                .unwrap()
107                .new_element(state1.get_qualified_name(
108                    None,
109                    None,
110                    state1.get_value(&"heading2".to_string()),
111                ))
112                .expect("unable to create element");
113            v.iter()
114                .for_each(|c| h.push(c.clone()).expect("unable to add node"));
115            Ok(((input1, state1), h))
116        }
117        Err(err) => Err(err),
118    }
119}
120
121fn strong<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
122    move |input| match tuple3(
123        tag("**"),
124        map(many0(none_of("*")), |v| v.iter().collect::<String>()),
125        tag("**"),
126    )(input)
127    {
128        Ok(((input1, state1), (_, c, _))) => {
129            let mut s = state1
130                .doc()
131                .unwrap()
132                .new_element(state1.get_qualified_name(
133                    None,
134                    None,
135                    state1.get_value(&"emph".to_string()),
136                ))
137                .expect("unable to create element");
138            let att = state1
139                .doc()
140                .unwrap()
141                .new_attribute(
142                    state1.get_qualified_name(None, None, state1.get_value(&"role".to_string())),
143                    state1.get_value(&"strong".to_string()),
144                )
145                .expect("unable to create attribute");
146            s.add_attribute(att).expect("unable to add attribute");
147            let content = state1
148                .doc()
149                .unwrap()
150                .new_text(Rc::new(Value::from(c)))
151                .expect("unable to create text node");
152            s.push(content).expect("unable to add node");
153            Ok(((input1, state1), s))
154        }
155        Err(err) => Err(err),
156    }
157}
158fn underline<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
159    move |input| match tuple3(
160        tag("__"),
161        map(many1(none_of("_")), |v| v.iter().collect::<String>()),
162        tag("__"),
163    )(input)
164    {
165        Ok(((input1, state1), (_, c, _))) => {
166            let mut s = state1
167                .doc()
168                .unwrap()
169                .new_element(state1.get_qualified_name(
170                    None,
171                    None,
172                    state1.get_value(&"emph".to_string()),
173                ))
174                .expect("unable to create element");
175            let att = state1
176                .doc()
177                .unwrap()
178                .new_attribute(
179                    state1.get_qualified_name(None, None, state1.get_value(&"role".to_string())),
180                    state1.get_value(&"underline".to_string()),
181                )
182                .expect("unable to create attribute");
183            s.add_attribute(att).expect("unable to add attribute");
184            let content = state1
185                .doc()
186                .unwrap()
187                .new_text(Rc::new(Value::from(c)))
188                .expect("unable to create text node");
189            s.push(content).expect("unable to add text node");
190            Ok(((input1, state1), s))
191        }
192        Err(err) => Err(err),
193    }
194}
195fn emphasis<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
196    move |input| match tuple3(
197        tag("//"),
198        map(many1(none_of("/")), |v| v.iter().collect::<String>()),
199        tag("//"),
200    )(input)
201    {
202        Ok(((input1, state1), (_, c, _))) => {
203            let mut s = state1
204                .doc()
205                .unwrap()
206                .new_element(state1.get_qualified_name(
207                    None,
208                    None,
209                    state1.get_value(&"emph".to_string()),
210                ))
211                .expect("unable to create element");
212            let content = state1
213                .doc()
214                .unwrap()
215                .new_text(Rc::new(Value::from(c)))
216                .expect("unable to create text node");
217            s.push(content).expect("unable to add text node");
218            Ok(((input1, state1), s))
219        }
220        Err(err) => Err(err),
221    }
222}
223fn phrases<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, Vec<N>), ParseError> {
224    many1(alt4(text_content(), strong(), emphasis(), underline()))
225}
226
227fn text_content<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
228    move |input| match map(many1(none_of("*/_\n")), |v| v.iter().collect::<String>())(input) {
229        Ok(((input1, state1), t)) => {
230            let p = state1
231                .doc()
232                .unwrap()
233                .new_text(Rc::new(Value::from(t)))
234                .expect("unable to create text node");
235            Ok(((input1, state1), p))
236        }
237        Err(err) => Err(err),
238    }
239}
240
241fn para<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, N), ParseError> {
242    move |input| match phrases()(input) {
243        Ok(((input1, state1), v)) => {
244            let mut p = state1
245                .doc()
246                .unwrap()
247                .new_element(state1.get_qualified_name(
248                    None,
249                    None,
250                    state1.get_value(&"para".to_string()),
251                ))
252                .expect("unable to create element");
253            v.iter()
254                .for_each(|c| p.push(c.clone()).expect("unable to add node"));
255            Ok(((input1, state1), p))
256        }
257        Err(err) => Err(err),
258    }
259}
260
261#[allow(dead_code)]
262fn eol<N: Node>() -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, ()), ParseError> {
263    map(many1(tag("\n")), |_| ())
264}
265
266// This is copied from xrust::parser::xpath::support
267// TODO: make it a public function exported by xrust
268fn none_of<N: Node>(
269    s: &str,
270) -> impl Fn(ParseInput<N>) -> Result<(ParseInput<N>, char), ParseError> + '_ {
271    move |(input, state)| {
272        if input.is_empty() {
273            Err(ParseError::Combinator)
274        } else {
275            let a = input.chars().next().unwrap();
276            match s.find(|b| a == b) {
277                Some(_) => Err(ParseError::Combinator),
278                None => Ok(((&input[1..], state), a)),
279            }
280        }
281    }
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    #[test]
289    fn parse_1() {
290        let d = parse(
291            "# Heading
292A paragraph
293## Level 2
294Some **strong** text
295# Level 1
296Some __underlined__ text
297## Another Level 2
298//Emphasised// text",
299        )
300        .expect("unable to parse MarkDown");
301        assert_eq!(
302            d.to_xml(),
303            "<article><heading1>Heading</heading1><para>A paragraph</para><heading2>Level 2</heading2><para>Some <emph role='strong'>strong</emph> text</para><heading1>Level 1</heading1><para>Some <emph role='underline'>underlined</emph> text</para><heading2>Another Level 2</heading2><para><emph>Emphasised</emph> text</para></article>"
304        )
305    }
306}