Skip to main content

xrust/parser/xml/
mod.rs

1mod attribute;
2mod chardata;
3mod dtd;
4mod element;
5mod misc;
6pub mod qname;
7mod reference;
8mod strings;
9mod xmldecl;
10
11use crate::item::Node;
12use crate::parser::combinators::map::map;
13use crate::parser::combinators::opt::opt;
14use crate::parser::combinators::tag::tag;
15use crate::parser::combinators::tuple::tuple4;
16use crate::parser::xml::dtd::doctypedecl;
17use crate::parser::xml::element::element;
18use crate::parser::xml::misc::misc;
19use crate::parser::xml::xmldecl::xmldecl;
20use crate::parser::{
21    ParseError, ParseInput, ParserState, ParserStateBuilder, StaticState, StaticStateBuilder,
22};
23use crate::xdmerror::{Error, ErrorKind};
24use crate::xmldecl::XMLDecl;
25use qualname::{NamespaceMap, NamespacePrefix, NamespaceUri};
26
27pub fn parse_with_state<N: Node, L>(
28    input: &str,
29    ps: ParserState<N>,
30    mut ss: StaticState<L>,
31) -> Result<N, Error>
32where
33    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
34{
35    match document((input, ps), &mut ss) {
36        Ok(((_, _), xmldoc)) => Ok(xmldoc),
37        Err(err) => {
38            match err {
39                ParseError::Combinator(f) => Err(Error::new(
40                    ErrorKind::ParseError,
41                    format!(
42                        "Unrecoverable parser error ({}) while parsing XML \"{}\"",
43                        f,
44                        input.chars().take(80).collect::<String>()
45                    ),
46                )),
47                /*
48                ParseError::InvalidChar { row, col } => {
49                    Result::Err(Error {
50                        kind: ErrorKind::ParseError,
51                        message: "Invalid character in document.".to_string(),
52                    })
53                }
54                 */
55                ParseError::MissingGenEntity { .. } => Err(Error::new(
56                    ErrorKind::ParseError,
57                    "Missing Gen Entity.".to_string(),
58                )),
59                ParseError::MissingParamEntity { .. } => Err(Error::new(
60                    ErrorKind::ParseError,
61                    "Missing Param Entity.".to_string(),
62                )),
63                ParseError::EntityDepth { .. } => Err(Error::new(
64                    ErrorKind::ParseError,
65                    "Entity depth limit exceeded".to_string(),
66                )),
67                ParseError::Validation { .. } => Err(Error::new(
68                    ErrorKind::ParseError,
69                    "Validation error.".to_string(),
70                )),
71                ParseError::MissingNameSpace => Err(Error::new(
72                    ErrorKind::ParseError,
73                    "Missing namespace declaration.".to_string(),
74                )),
75                ParseError::NotWellFormed(s) => Err(Error::new(
76                    ErrorKind::ParseError,
77                    format!("XML document not well formed at \"{}\".", s),
78                )),
79                ParseError::ExtDTDLoadError => Err(Error::new(
80                    ErrorKind::ParseError,
81                    "Unable to open external DTD.".to_string(),
82                )),
83                ParseError::Notimplemented => Err(Error::new(
84                    ErrorKind::ParseError,
85                    "Unimplemented feature.".to_string(),
86                )),
87                _ => Err(Error::new(ErrorKind::Unknown, "Unknown error.".to_string())),
88            }
89        }
90    }
91}
92
93// TODO: remove Option<L> argument
94pub fn parse<L, N: Node>(doc: N, input: &str, r: Option<L>) -> Result<N, Error>
95where
96    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
97{
98    let (xmldoc, _) = parse_with_ns(doc, input, r)?;
99    Ok(xmldoc)
100}
101
102// TODO: Review need for this function.
103// Is returning a NamespaceMap really necessary?
104pub fn parse_with_ns<L, N: Node>(
105    doc: N,
106    input: &str,
107    r: Option<L>,
108) -> Result<(N, Option<NamespaceMap>), Error>
109where
110    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
111{
112    let state = ParserStateBuilder::new().doc(doc).build();
113    let static_state = r.map_or(StaticState::new(), |f| {
114        StaticStateBuilder::new().namespace(f).build()
115    });
116    Ok((parse_with_state(input, state, static_state)?, None))
117}
118
119fn document<'a, N: Node, L>(
120    input: ParseInput<'a, N>,
121    ss: &mut StaticState<L>,
122) -> Result<(ParseInput<'a, N>, N), ParseError>
123where
124    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
125{
126    match tuple4(
127        opt(utf8bom()),
128        opt(prolog()),
129        element(),
130        opt(misc()),
131        "document",
132    )(input, ss)
133    {
134        Err(err) => Err(err),
135        Ok(((input1, state1), (_, p, e, m))) => {
136            //Check nothing remaining in iterator, nothing after the end of the root node.
137            if input1.is_empty() {
138                /*
139                   We were checking XML IDRefs as we parsed, but sometimes an ID comes after the IDREF,
140                   we now check those cases to ensure that all IDs needed were reported.
141                */
142                if state1.id_tracking {
143                    for idref in ss.ids_pending.iter() {
144                        if ss.ids_read.get(idref).is_none() {
145                            return Err(ParseError::IDError(String::from("ID missing")));
146                        }
147                    }
148                }
149
150                let pr = p.unwrap_or((None, vec![]));
151
152                let mut d = state1.doc.clone().unwrap();
153
154                pr.1.iter()
155                    .for_each(|n| d.push(n.clone()).expect("unable to add node"));
156                d.push(e).expect("unable to add node");
157                m.unwrap_or_default()
158                    .iter()
159                    .for_each(|n| d.push(n.clone()).expect("unable to add node"));
160                if let Some(x) = pr.0 {
161                    let _ = d.set_xmldecl(x);
162                }
163
164                if !state1.dtd.patterns.is_empty() {
165                    let _ = d.set_dtd((*state1.dtd).clone());
166                };
167
168                Ok((
169                    (input1, state1.clone()),
170                    state1.doc.clone().unwrap().clone(),
171                ))
172            } else {
173                Err(ParseError::NotWellFormed(format!(
174                    "unexpected extra characters: \"{}\"",
175                    input1
176                )))
177            }
178        }
179    }
180}
181
182// prolog ::= XMLDecl misc* (doctypedecl Misc*)?
183fn prolog<'a, N: Node, L>() -> impl Fn(
184    ParseInput<'a, N>,
185    &mut StaticState<L>,
186) -> Result<
187    (ParseInput<'a, N>, (Option<XMLDecl>, Vec<N>)),
188    ParseError,
189>
190where
191    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
192{
193    map(
194        tuple4(opt(xmldecl()), misc(), opt(doctypedecl()), misc(), "prolog"),
195        |(xmld, mut m1, _dtd, mut m2)| {
196            m1.append(&mut m2);
197            (xmld, m1)
198        },
199    )
200}
201
202fn utf8bom<'a, N: Node, L>()
203-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, ()), ParseError>
204where
205    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
206{
207    tag("\u{feff}")
208}