nom_xml/
tag.rs

1use crate::{
2    attribute::{Attribute, AttributeValue, DefaultDecl},
3    error::Error,
4    namespaces::ParseNamespace,
5    parse::Parse,
6    prolog::subset::entity::{entity_value::EntityValue, EntitySource},
7    IResult, Name,
8};
9use nom::{
10    branch::alt,
11    bytes::complete::tag,
12    combinator::{map, map_res, opt},
13    multi::{many0, many1},
14    sequence::{delimited, pair, tuple},
15};
16
17use std::{cell::RefCell, collections::HashMap, rc::Rc};
18
19#[derive(Clone, Debug, PartialEq, Eq)]
20
21pub enum TagState {
22    Start,
23    End,
24    Empty,
25}
26
27#[derive(Clone, PartialEq, Eq)]
28pub struct Tag {
29    pub name: Name,
30    pub attributes: Option<Vec<Attribute>>, // Attribute::Instance
31    pub state: TagState,
32}
33
34impl<'a> Parse<'a> for Tag {
35    type Args = ();
36    type Output = IResult<&'a str, Self>;
37}
38impl<'a> ParseNamespace<'a> for Tag {}
39
40// TODO: Investigate. The hardcoded bracket codes is kind of a hack to get reference element parsing to work. Unsure of how this is going to impact invalid XML.
41// Tried to use decode, but having some lifetime issues
42impl Tag {
43    pub fn new(name: Name, attributes: Option<Vec<Attribute>>, state: TagState) -> Self {
44        Self {
45            name,
46            attributes,
47            state,
48        }
49    }
50
51    // [40] STag ::= '<' Name (S Attribute)* S? '>'
52    // Namespaces (Third Edition) [12] STag ::= '<' QName (S Attribute)* S? '>'
53    pub fn parse_start_tag(
54        input: &str,
55        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
56        entity_source: EntitySource,
57    ) -> IResult<&str, Self> {
58        map(
59            tuple((
60                alt((tag("&#60;"), tag("&#x3C;"), tag("<"))),
61                alt((Self::parse_qualified_name, Self::parse_name)),
62                many0(pair(Self::parse_multispace1, |i| {
63                    Attribute::parse_attribute(i, entity_references.clone(), entity_source.clone())
64                })),
65                Self::parse_multispace0,
66                alt((tag("&#62;"), tag("&#x3E;"), tag(">"))),
67            )),
68            |(_open_char, name, attributes, _whitespace, _close_char)| {
69                let attributes: Vec<_> = attributes
70                    .into_iter()
71                    .map(|(_whitespace, attr)| attr)
72                    .collect();
73                Self {
74                    name,
75                    attributes: if attributes.is_empty() {
76                        // check doctype here, if within that, add them to the tag else, None
77                        None
78                    } else {
79                        Some(attributes)
80                    },
81                    state: TagState::Start,
82                }
83            },
84        )(input)
85    }
86
87    pub fn parse_start_tag_by_name<'a>(
88        input: &'a str,
89        tag_name: &'a str,
90        attributes: &Option<Vec<Attribute>>,
91        entity_references: &Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
92        entity_source: EntitySource,
93    ) -> IResult<&'a str, Self> {
94        let mut current_input = input;
95
96        loop {
97            let result: IResult<&'a str, Self> = map_res(
98                tuple((
99                    alt((tag("&#60;"), tag("&#x3C;"), tag("<"))),
100                    map_res(
101                        alt((Self::parse_qualified_name, Self::parse_name)),
102                        |name| {
103                            if name.local_part == tag_name {
104                                Ok(name)
105                            } else {
106                                Err(nom::Err::Error(nom::error::Error::new(
107                                    "Start Tag doesn't match",
108                                    nom::error::ErrorKind::Tag,
109                                )))
110                            }
111                        },
112                    ),
113                    many0(pair(Self::parse_multispace1, |i| {
114                        Attribute::parse_attribute(i, entity_references.clone(), entity_source.clone())
115                    })),
116                    Self::parse_multispace0,
117                    alt((tag("&#62;"), tag("&#x3E;"), tag(">"))),
118                )),
119                |(_open_char, name, attributes_vec, _whitespace, _close_char)| -> Result<Self, nom::Err<Error>> {
120                    let parsed_attributes: Vec<_> = attributes_vec
121                        .into_iter()
122                        .map(|(_whitespace, attr)| attr)
123                        .collect();
124                    if let Some(expected_attributes) = attributes {
125                        if expected_attributes == &parsed_attributes {
126                            Ok(Self {
127                                name,
128                                attributes: if parsed_attributes.is_empty() {
129                                    None
130                                } else {
131                                    Some(parsed_attributes)
132                                },
133                                state: TagState::Start,
134                            })
135                        } else {
136                            Err(nom::Err::Error(nom::error::Error::new(
137                                "Attributes do not match",
138                                nom::error::ErrorKind::Tag,
139                            ).into()))
140                        }
141                    } else {
142                        Ok(Self {
143                            name,
144                            attributes: if parsed_attributes.is_empty() {
145                                None
146                            } else {
147                                Some(parsed_attributes)
148                            },
149                            state: TagState::Start,
150                        })
151                    }
152                },
153            )(current_input);
154
155            match result {
156                Ok((next_input, tag)) => return Ok((next_input, tag)),
157                Err(nom::Err::Error(_)) => {
158                    if current_input.is_empty() {
159                        return Err(nom::Err::Error(
160                            nom::error::Error::new(current_input, nom::error::ErrorKind::Tag)
161                                .into(),
162                        ));
163                    }
164                    // Move forward in the input string to avoid infinite loop
165                    current_input = &current_input[1..];
166                }
167                Err(e) => return Err(e),
168            }
169        }
170    }
171    // [42] ETag ::= '</' Name S? '>'
172    // Namespaces (Third Edition) [13] ETag ::= '</' QName S? '>'
173    pub fn parse_end_tag(input: &str) -> IResult<&str, Self> {
174        delimited(
175            alt((tag("&#60;/"), tag("&#x3C;/"), tag("</"))),
176            map(
177                tuple((
178                    Self::parse_multispace0,
179                    alt((Self::parse_qualified_name, Self::parse_name)),
180                    Self::parse_multispace0,
181                )),
182                |(_open_tag, name, _close_tag)| Self {
183                    name,
184                    attributes: None, // Attributes are not parsed for end tags
185                    state: TagState::End,
186                },
187            ),
188            alt((tag("&#62;"), tag("&#x3E;"), tag(">"))),
189        )(input)
190    }
191    // [42] ETag ::= '</' Name S? '>'
192    // Namespaces (Third Edition) [13] ETag ::= '</' QName S? '>'
193    pub fn parse_end_tag_by_name<'a>(input: &'a str, tag_name: &'a str) -> IResult<&'a str, Self> {
194        delimited(
195            alt((tag("&#60;/"), tag("&#x3C;/"), tag("</"))),
196            map(
197                tuple((
198                    Self::parse_multispace0,
199                    map_res(
200                        alt((Self::parse_qualified_name, Self::parse_name)),
201                        |name| {
202                            if name.local_part == tag_name {
203                                Ok(name)
204                            } else {
205                                Err(nom::Err::Error(nom::error::Error::new(
206                                    "END TAG FAILING",
207                                    nom::error::ErrorKind::Tag,
208                                )))
209                            }
210                        },
211                    ),
212                    Self::parse_multispace0,
213                )),
214                |(_open_tag, name, _close_tag)| Self {
215                    name,             //: Name::new(None, name),
216                    attributes: None, // Attributes are not parsed for end tags
217                    state: TagState::End,
218                },
219            ),
220            alt((tag("&#62;"), tag("&#x3E;"), tag(">"))),
221        )(input)
222    }
223
224    // [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
225    // Namespaces (Third Edition) [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
226    pub fn parse_empty_element_tag(
227        input: &str,
228        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
229        entity_source: EntitySource,
230    ) -> IResult<&str, Self> {
231        map(
232            tuple((
233                alt((tag("&#60;"), tag("&#x3C;"), tag("<"))),
234                alt((Self::parse_qualified_name, Self::parse_name)),
235                opt(many1(pair(Self::parse_multispace1, |i| {
236                    Attribute::parse(i, (entity_references.clone(), entity_source.clone()))
237                }))),
238                Self::parse_multispace0,
239                alt((tag("/&#62;"), tag("/&#x3E;"), tag("/>"))),
240            )),
241            |(_open_tag, name, attributes, _whitespace, _close_tag)| Self {
242                name,
243                attributes: attributes
244                    .map(|attr| attr.into_iter().map(|(_whitespace, attr)| attr).collect()),
245                state: TagState::Empty,
246            },
247        )(input)
248    }
249
250    // [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
251    // Namespaces (Third Edition) [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
252    pub fn parse_empty_element_tag_by_name<'a>(
253        input: &'a str,
254        tag_name: &'a str,
255        _attributes: &Option<Vec<Attribute>>, //TODO: implement empty tag attribute matching
256        entity_references: &Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
257        entity_source: EntitySource,
258    ) -> IResult<&'a str, Self> {
259        map(
260            tuple((
261                alt((tag("&#60;"), tag("&#x3C;"), tag("<"))),
262                tag(tag_name),
263                opt(many1(pair(Self::parse_multispace1, |i| {
264                    Attribute::parse(i, (entity_references.clone(), entity_source.clone()))
265                }))),
266                Self::parse_multispace0,
267                alt((tag("/&#62;"), tag("/&#x3E;"), tag("/>"))),
268            )),
269            |(_open_tag, name, attributes, _whitespace, _close_tag)| Self {
270                name: Name::new(None, name),
271                attributes: attributes
272                    .map(|attr| attr.into_iter().map(|(_whitespace, attr)| attr).collect()),
273                state: TagState::Empty,
274            },
275        )(input)
276    }
277    pub fn merge_default_attributes(&mut self, default_attributes: &[Attribute]) {
278        let existing_attributes = self.attributes.get_or_insert_with(Vec::new);
279
280        let mut seen_names = std::collections::HashSet::new();
281        for default_attr in default_attributes {
282            if let Attribute::Definition {
283                name, default_decl, ..
284            } = default_attr
285            {
286                if seen_names.contains(name) {
287                    // Skip if this name has already been processed.
288                    continue;
289                }
290                seen_names.insert(name.clone());
291
292                // Only add the attribute if it doesn't already exist and has a default value
293                let exists = existing_attributes.iter().any(|attr| matches!(attr, Attribute::Instance { name: existing_name, .. } if existing_name == name));
294                if !exists {
295                    if let DefaultDecl::Value(val) = default_decl {
296                        existing_attributes.push(Attribute::Instance {
297                            name: name.clone(),
298                            value: AttributeValue::Value(val.clone()),
299                        });
300                    }
301                }
302            }
303        }
304
305        // If no attributes were added (and none were already present), set attributes to None
306        if existing_attributes.is_empty() {
307            self.attributes = None;
308        }
309    }
310
311    pub fn add_attributes(&mut self, new_attributes: Vec<Attribute>) {
312        self.attributes = if new_attributes.is_empty() {
313            None
314        } else {
315            Some(new_attributes)
316        };
317    }
318}