xrust_md/
md.rs

1//! A markdown parser for Xrust
2
3#![allow(dead_code)]
4
5use qualname::{NamespacePrefix, NamespaceUri, NcName, QName};
6use std::rc::Rc;
7use xrust::item::Node;
8use xrust::parser::combinators::alt::{alt2, alt4};
9use xrust::parser::combinators::list::separated_list0;
10use xrust::parser::combinators::many::{many0, many1};
11use xrust::parser::combinators::map::map;
12use xrust::parser::combinators::tag::tag;
13use xrust::parser::combinators::tuple::tuple3;
14use xrust::parser::combinators::whitespace::whitespace0;
15use xrust::parser::{ParseError, ParseInput, ParserStateBuilder, StaticState, StaticStateBuilder};
16use xrust::trees::smite::RNode;
17use xrust::value::Value;
18use xrust::xdmerror::{Error, ErrorKind};
19
20pub fn parse(input: &str) -> Result<RNode, Error> {
21    let d = RNode::new_document();
22    if input.is_empty() {
23        return Ok(d);
24    }
25    let state = ParserStateBuilder::new().doc(d.clone()).build();
26    let mut static_state = StaticStateBuilder::new()
27        .namespace(|_| Err(ParseError::Notimplemented))
28        .build();
29    md_expr((input.trim(), state), &mut static_state)
30        .map(|_| d)
31        .map_err(|e| match e {
32            ParseError::Combinator(_s) => Error::new(
33                ErrorKind::ParseError,
34                String::from("unrecoverable parse error"),
35            ),
36            ParseError::NotWellFormed(f) => Error::new(
37                ErrorKind::ParseError,
38                format!("unrecognised extra characters \"{}\"", f),
39            ),
40            _ => Error::new(ErrorKind::Unknown, String::from("unknown error")),
41        })
42}
43
44fn md_expr<'a, N: Node, L>(
45    input: ParseInput<'a, N>,
46    ss: &mut StaticState<L>,
47) -> Result<(ParseInput<'a, N>, N), ParseError>
48where
49    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
50{
51    match document()(input, ss) {
52        Err(e) => Err(e),
53        Ok(((input1, state1), n)) => {
54            if input1.is_empty() {
55                Ok(((input1, state1), n))
56            } else {
57                Err(ParseError::NotWellFormed(input1.to_string()))
58            }
59        }
60    }
61}
62
63fn document<'a, N: Node, L>()
64-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, N), ParseError>
65where
66    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
67{
68    move |input, ss| match separated_list0(map(many1(tag("\n")), |_| ()), block())(input, ss) {
69        Ok(((input1, state1), v)) => {
70            let mut doc = state1.doc().unwrap();
71            let mut top = doc
72                .new_element(QName::from_local_name(NcName::try_from("article").unwrap()))
73                .expect("unable to create element");
74            v.iter()
75                .for_each(|c| top.push(c.clone()).expect("unable to add node"));
76            doc.push(top.clone()).expect("unable to add node");
77            Ok(((input1, state1.clone()), top))
78        }
79        Err(err) => Err(err),
80    }
81}
82
83fn block<'a, N: Node, L>()
84-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, N), ParseError>
85where
86    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
87{
88    alt2(heading(), para())
89}
90
91fn heading<'a, N: Node, L>()
92-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, N), ParseError>
93where
94    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
95{
96    move |input, ss| match tuple3(many1(tag("#")), whitespace0(), phrases())(input, ss) {
97        Ok(((input1, state1), (h, _, v))) => {
98            let hd_name = format!("heading{}", h.len());
99            let mut h = state1
100                .doc()
101                .unwrap()
102                .new_element(QName::from_local_name(
103                    NcName::try_from(hd_name.as_str()).unwrap(),
104                ))
105                .expect("unable to create element");
106            v.iter()
107                .for_each(|c| h.push(c.clone()).expect("unable to add node"));
108            Ok(((input1, state1), h))
109        }
110        Err(err) => Err(err),
111    }
112}
113
114fn strong<'a, N: Node, L>()
115-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, N), ParseError>
116where
117    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
118{
119    move |input, ss| match tuple3(
120        tag("**"),
121        map(many0(none_of("*")), |v| v.iter().collect::<String>()),
122        tag("**"),
123    )(input, ss)
124    {
125        Ok(((input1, state1), (_, c, _))) => {
126            let mut s = state1
127                .doc()
128                .unwrap()
129                .new_element(QName::from_local_name(NcName::try_from("emph").unwrap()))
130                .expect("unable to create element");
131            let att = state1
132                .doc()
133                .unwrap()
134                .new_attribute(
135                    QName::from_local_name(NcName::try_from("role").unwrap()),
136                    Rc::new(Value::from("strong")),
137                )
138                .expect("unable to create attribute");
139            s.add_attribute(att).expect("unable to add attribute");
140            let content = state1
141                .doc()
142                .unwrap()
143                .new_text(Rc::new(Value::from(c)))
144                .expect("unable to create text node");
145            s.push(content).expect("unable to add node");
146            Ok(((input1, state1), s))
147        }
148        Err(err) => Err(err),
149    }
150}
151fn underline<'a, N: Node, L>()
152-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, N), ParseError>
153where
154    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
155{
156    move |input, ss| match tuple3(
157        tag("__"),
158        map(many1(none_of("_")), |v| v.iter().collect::<String>()),
159        tag("__"),
160    )(input, ss)
161    {
162        Ok(((input1, state1), (_, c, _))) => {
163            let mut s = state1
164                .doc()
165                .unwrap()
166                .new_element(QName::from_local_name(NcName::try_from("emph").unwrap()))
167                .expect("unable to create element");
168            let att = state1
169                .doc()
170                .unwrap()
171                .new_attribute(
172                    QName::from_local_name(NcName::try_from("role").unwrap()),
173                    Rc::new(Value::from("underline")),
174                )
175                .expect("unable to create attribute");
176            s.add_attribute(att).expect("unable to add attribute");
177            let content = state1
178                .doc()
179                .unwrap()
180                .new_text(Rc::new(Value::from(c)))
181                .expect("unable to create text node");
182            s.push(content).expect("unable to add text node");
183            Ok(((input1, state1), s))
184        }
185        Err(err) => Err(err),
186    }
187}
188fn emphasis<'a, N: Node, L>()
189-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, N), ParseError>
190where
191    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
192{
193    move |input, ss| match tuple3(
194        tag("//"),
195        map(many1(none_of("/")), |v| v.iter().collect::<String>()),
196        tag("//"),
197    )(input, ss)
198    {
199        Ok(((input1, state1), (_, c, _))) => {
200            let mut s = state1
201                .doc()
202                .unwrap()
203                .new_element(QName::from_local_name(NcName::try_from("emph").unwrap()))
204                .expect("unable to create element");
205            let content = state1
206                .doc()
207                .unwrap()
208                .new_text(Rc::new(Value::from(c)))
209                .expect("unable to create text node");
210            s.push(content).expect("unable to add text node");
211            Ok(((input1, state1), s))
212        }
213        Err(err) => Err(err),
214    }
215}
216fn phrases<'a, N: Node, L>()
217-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, Vec<N>), ParseError>
218where
219    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
220{
221    many1(alt4(text_content(), strong(), emphasis(), underline()))
222}
223
224fn text_content<'a, N: Node, L>()
225-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, N), ParseError>
226where
227    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
228{
229    move |input, ss| match map(many1(none_of("*/_\n")), |v| v.iter().collect::<String>())(input, ss)
230    {
231        Ok(((input1, state1), t)) => {
232            let p = state1
233                .doc()
234                .unwrap()
235                .new_text(Rc::new(Value::from(t)))
236                .expect("unable to create text node");
237            Ok(((input1, state1), p))
238        }
239        Err(err) => Err(err),
240    }
241}
242
243fn para<'a, N: Node, L>()
244-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, N), ParseError>
245where
246    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
247{
248    move |input, ss| match phrases()(input, ss) {
249        Ok(((input1, state1), v)) => {
250            let mut p = state1
251                .doc()
252                .unwrap()
253                .new_element(QName::from_local_name(NcName::try_from("para").unwrap()))
254                .expect("unable to create element");
255            v.iter()
256                .for_each(|c| p.push(c.clone()).expect("unable to add node"));
257            Ok(((input1, state1), p))
258        }
259        Err(err) => Err(err),
260    }
261}
262
263#[allow(dead_code)]
264fn eol<'a, N: Node, L>()
265-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, ()), ParseError>
266where
267    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
268{
269    map(many1(tag("\n")), |_| ())
270}
271
272// This is copied from xrust::parser::xpath::support
273// TODO: make it a public function exported by xrust
274fn none_of<'a, N: Node, L>(
275    s: &str,
276) -> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, char), ParseError> + '_
277where
278    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
279{
280    move |(input, state), _ss| {
281        if input.is_empty() {
282            Err(ParseError::Combinator(String::from("no input")))
283        } else {
284            let mut ch_it = input.char_indices();
285            let (_, a) = ch_it.next().unwrap();
286            match s.find(|b| a == b) {
287                Some(_) => Err(ParseError::Combinator(String::from("found char"))),
288                None => {
289                    if let Some((j, _)) = ch_it.next() {
290                        Ok(((&input[j..], state), a))
291                    } else {
292                        Ok((("", state), a))
293                    }
294                }
295            }
296        }
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    #[test]
305    fn parse_1() {
306        let d = parse(
307            "# Heading
308A paragraph
309## Level 2
310Some **strong** text
311# Level 1
312Some __underlined__ text
313## Another Level 2
314//Emphasised// text",
315        )
316        .expect("unable to parse MarkDown");
317        assert_eq!(
318            d.to_xml(),
319            "<article><heading1>Heading</heading1><para>A paragraph</para><heading2>Level 2</heading2><para>Some <emph role='strong'>strong</emph> text</para><heading1>Level 1</heading1><para>Some <emph role='underline'>underlined</emph> text</para><heading2>Another Level 2</heading2><para><emph>Emphasised</emph> text</para></article>"
320        )
321    }
322
323    #[test]
324    fn headings_1() {
325        let d = parse(
326            "# Level 1 Heading
327## Level 2 Heading
328### Level 3 Heading
329#### Level 4 Heading
330",
331        )
332        .expect("unable to parse MarkDown");
333        assert_eq!(
334            d.to_xml(),
335            "<article><heading1>Level 1 Heading</heading1><heading2>Level 2 Heading</heading2><heading3>Level 3 Heading</heading3><heading4>Level 4 Heading</heading4></article>"
336        )
337    }
338}