xml/
xml.rs

1//! This is an example of an XML parser implemented with `shrimple_parser`
2
3use {
4    core::fmt::{Display, Formatter},
5    shrimple_parser::{
6        from_tuple, match_out, parse_whitespace,
7        pattern::{parse, parse_until, parse_until_ex, NotEscaped},
8        ready, Input, Parser, ParsingError, ParsingResult,
9    },
10    std::env::args,
11};
12
13#[derive(Debug, Clone)]
14enum Error {
15    TagUnclosed,
16    NoAttrValue,
17    UnclosedString,
18}
19
20impl Display for Error {
21    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
22        f.write_str(match self {
23            Self::TagUnclosed => "expected the tag closed with `>`",
24            Self::NoAttrValue => "expected an attribute value enclosed in quotes",
25            Self::UnclosedString => "expected the string closed with `\"`",
26        })
27    }
28}
29
30#[derive(Debug, Clone)]
31struct Attr<In> {
32    name: In,
33    value: Option<In>,
34}
35
36#[derive(Debug, Clone)]
37enum Fragment<In> {
38    Tag {
39        self_closing: bool,
40        name: In,
41        attrs: Vec<Attr<In>>,
42    },
43    ClosingTag {
44        name: In,
45    },
46    Text(In),
47}
48
49fn parse_ident<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
50    parse_until(|c| ['>', '/', '='].contains(&c) || c.is_whitespace())
51        .filter(|i: &In| !i.is_empty())
52        .parse(input)
53}
54
55fn parse_string<In: Input>(input: In) -> ParsingResult<In, In, Error> {
56    parse('"')
57        .then(parse_until_ex(NotEscaped('\\', '"')).or_reason(Error::UnclosedString))
58        .parse(input)
59}
60
61fn parse_attr<In: Input>(input: In) -> ParsingResult<In, Attr<In>, Error> {
62    parse_ident
63        .skip(parse_whitespace)
64        .and(
65            parse('=')
66                .skip(parse_whitespace)
67                .then(parse_string.or_reason(Error::NoAttrValue))
68                .skip(parse_whitespace)
69                .maybe(),
70        )
71        .map_out(from_tuple!(Attr { name, value }))
72        .parse(input)
73}
74
75fn parse_tag<In: Input>(input: In) -> ParsingResult<In, Fragment<In>, Error> {
76    parse_whitespace::<In, Error>
77        .then(parse('<'))
78        .then(parse_whitespace)
79        .then(parse('/').ok())
80        .skip(parse_whitespace)
81        .and(parse_ident)
82        .skip(parse_whitespace)
83        .map(match_out! {
84            (true, name) => ready(Fragment::ClosingTag { name }),
85            (false, name) => parse_attr
86                .collect()
87                .and(parse('/').ok())
88                .skip(parse_whitespace)
89                .map_out(|(attrs, self_closing)| Fragment::Tag { self_closing, name: name.clone(), attrs })
90        })
91        .skip(parse_whitespace)
92        .skip(parse('>').or_reason(Error::TagUnclosed))
93        .parse(input)
94}
95
96fn xml_fragments<In: Input>(
97    input: In,
98) -> impl Iterator<Item = Result<Fragment<In>, ParsingError<In, Error>>> {
99    parse_tag
100        .or_nonempty(parse_until('<').map_out(Fragment::Text))
101        .iter(input)
102}
103
104// TODO: analog to `.with_source_line()` without FS access
105fn main() {
106    let input = args().nth(1).expect("XML input");
107    for fragment in xml_fragments(&*input) {
108        match fragment {
109            Ok(fragment) => println!("{fragment:?}"),
110            Err(e) => eprintln!("{}", e.with_src_loc("<input>", &input)),
111        }
112    }
113}