nom_xml/
attribute.rs

1use crate::{
2    namespaces::ParseNamespace,
3    parse::Parse,
4    prolog::subset::entity::{entity_value::EntityValue, EntitySource},
5    reference::{ParseReference, Reference},
6    IResult, Name,
7};
8use nom::{
9    branch::alt,
10    bytes::complete::{tag, take_till1},
11    character::complete::char,
12    combinator::{map, map_res, opt, value},
13    multi::{many0, separated_list1},
14    sequence::{delimited, pair, tuple},
15};
16use std::{cell::RefCell, collections::HashMap, rc::Rc};
17
18#[derive(Clone, PartialEq, Eq)]
19pub enum Prefix {
20    Default,
21    Prefix(String),
22}
23#[derive(Clone, PartialEq, Eq)]
24pub enum AttributeValue {
25    Value(String),
26    Values(Vec<AttributeValue>),
27    Reference(Reference),
28    EmptyExternalReference,
29}
30
31#[derive(Clone, PartialEq, Eq)]
32pub enum Attribute {
33    Definition {
34        name: Name,
35        att_type: AttType,
36        default_decl: DefaultDecl,
37        source: EntitySource,
38    },
39    Reference(Reference),
40    Instance {
41        name: Name,
42        value: AttributeValue,
43    },
44    Required,
45    Implied,
46    Namespace {
47        prefix: Prefix,
48        uri: AttributeValue,
49    },
50}
51
52impl<'a> Parse<'a> for Attribute {
53    type Args = (
54        Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
55        EntitySource,
56    );
57    type Output = IResult<&'a str, Self>;
58
59    // [41] Attribute ::= Name Eq AttValue
60    fn parse(input: &'a str, args: Self::Args) -> Self::Output {
61        let (entity_references, entity_source) = args;
62        {
63            map(
64                tuple((Self::parse_name, Self::parse_eq, move |i| {
65                    Self::parse_attvalue(i, entity_references.clone(), entity_source.clone())
66                })),
67                |(name, _eq, value)| Attribute::Instance { name, value },
68            )(input)
69        }
70    }
71}
72
73impl<'a> ParseNamespace<'a> for Attribute {}
74impl Attribute {
75    /// Useful for creating an attribute to match against in parsing specific tags.
76    ///
77    /// See the ['parse_element_with_specific_attribute_value'](../../parse_element_with_specific_attribute_value/index.html) example for more information.
78    ///
79    /// Future use in writing XML files.
80    ///
81    /// Note: this will not create attribute names that have prefixes
82    ///
83    /// ```rust
84    /// use nom_xml::attribute::Attribute;
85    /// let attr = Attribute::new("name","value");
86    /// ```
87    ///
88    pub fn new(name: &str, value: &str) -> Self {
89        Attribute::Instance {
90            name: Name::new(None, name),
91            value: AttributeValue::Value(value.into()),
92        }
93    }
94
95    // [53] AttDef ::= S Name S AttType S DefaultDecl
96    pub fn parse_definition(
97        input: &str,
98        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
99        entity_source: EntitySource,
100    ) -> IResult<&str, Attribute> {
101        map(
102            tuple((
103                Self::parse_multispace1,
104                Self::parse_name,
105                Self::parse_multispace1,
106                |i| AttType::parse(i, ()),
107                Self::parse_multispace1,
108                |i| DefaultDecl::parse(i, (entity_references.clone(), entity_source.clone())),
109            )),
110            |(_whitespace1, name, _whitespace2, att_type, _whitespace3, default_decl)| {
111                Attribute::Definition {
112                    name,
113                    att_type,
114                    default_decl,
115                    source: entity_source.clone(),
116                }
117            },
118        )(input)
119    }
120
121    // Namespaces (Third Edition) [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
122    pub fn parse_qualified_definition(
123        input: &str,
124        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
125        entity_source: EntitySource,
126    ) -> IResult<&str, Attribute> {
127        map(
128            tuple((
129                Self::parse_multispace1,
130                alt((
131                    Self::parse_qualified_name,
132                    Self::parse_namespace_attribute_name,
133                )),
134                Self::parse_multispace1,
135                |i| AttType::parse(i, ()),
136                Self::parse_multispace1,
137                |i| DefaultDecl::parse(i, (entity_references.clone(), entity_source.clone())),
138            )),
139            |(_whitespace1, name, _whtiespace2, att_type, _whtiespace3, default_decl)| {
140                Attribute::Definition {
141                    name,
142                    att_type,
143                    default_decl,
144                    source: entity_source.clone(),
145                }
146            },
147        )(input)
148    }
149
150    // [10] AttValue ::= '"' ([^<&"] | Reference)* '"'|  "'" ([^<&'] | Reference)* "'"
151    pub fn parse_attvalue(
152        input: &str,
153        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
154        entity_source: EntitySource,
155    ) -> IResult<&str, AttributeValue> {
156        match entity_source {
157            EntitySource::Internal | EntitySource::None => {
158                map(
159                    alt((
160                        delimited(
161                            tag("\""),
162                            many0(alt((
163                                map(
164                                    take_till1(|c| c == '<' || c == '&' || c == '\"'),
165                                    |s: &str| AttributeValue::Value(s.into()),
166                                ),
167                                map(
168                                    |i| Reference::parse(i, entity_source.clone()),
169                                    |reference| {
170                                        reference.normalize_attribute(
171                                            entity_references.clone(),
172                                            entity_source.clone(),
173                                        )
174                                    },
175                                ),
176                            ))),
177                            tag("\""),
178                        ),
179                        delimited(
180                            tag("'"),
181                            many0(alt((
182                                map(
183                                    take_till1(|c| c == '<' || c == '&' || c == '\''),
184                                    |s: &str| AttributeValue::Value(s.into()),
185                                ),
186                                map(
187                                    |i| Reference::parse(i, entity_source.clone()),
188                                    |reference| {
189                                        reference.normalize_attribute(
190                                            entity_references.clone(),
191                                            entity_source.clone(),
192                                        )
193                                    },
194                                ),
195                            ))),
196                            tag("'"),
197                        ),
198                    )),
199                    |contents: Vec<AttributeValue>| {
200                        let mut buffer = String::new();
201                        for content in contents {
202                            if let AttributeValue::Value(mut value) = content {
203                                // End-of-Line Handling for each value
204                                let mut chars: Vec<char> = value.chars().collect();
205                                let mut i = 0;
206                                while i < chars.len() {
207                                    if chars[i] == '\r' {
208                                        if i + 1 < chars.len() && chars[i + 1] == '\n' {
209                                            chars.remove(i);
210                                        } else {
211                                            chars[i] = '\n';
212                                        }
213                                    }
214                                    i += 1;
215                                }
216                                value = chars.into_iter().collect();
217                                buffer.push_str(&value);
218                            }
219                        }
220
221                        AttributeValue::Value(buffer)
222                    },
223                )(input)
224            }
225
226            EntitySource::External => {
227                map(
228                    many0(alt((
229                        map(
230                            |i| {
231                                tuple((
232                                    |input| Reference::parse_parameter_reference(input),
233                                    Self::parse_multispace0,
234                                ))(i)
235                            },
236                            |(reference, _whitespace)| {
237                                reference.normalize_attribute(
238                                    entity_references.clone(),
239                                    entity_source.clone(),
240                                )
241                            },
242                        ),
243                        delimited(
244                            tag("\""),
245                            map(
246                                many0(alt((
247                                    map(
248                                        take_till1(|c| c == '<' || c == '&' || c == '\"'),
249                                        |s: &str| AttributeValue::Value(s.into()),
250                                    ),
251                                    map(
252                                        |i| Reference::parse(i, entity_source.clone()),
253                                        |reference| {
254                                            reference.normalize_attribute(
255                                                entity_references.clone(),
256                                                entity_source.clone(),
257                                            )
258                                        },
259                                    ),
260                                ))),
261                                |values| {
262                                    let mut buffer = String::new();
263                                    for value in values {
264                                        if let AttributeValue::Value(v) = value {
265                                            buffer.push_str(&v);
266                                        }
267                                    }
268                                    AttributeValue::Value(buffer)
269                                },
270                            ),
271                            tag("\""),
272                        ),
273                        delimited(
274                            tag("'"),
275                            map(
276                                many0(alt((
277                                    map(
278                                        take_till1(|c| c == '<' || c == '&' || c == '\''),
279                                        |s: &str| AttributeValue::Value(s.into()),
280                                    ),
281                                    map(
282                                        |i| Reference::parse(i, entity_source.clone()),
283                                        |reference| {
284                                            reference.normalize_attribute(
285                                                entity_references.clone(),
286                                                entity_source.clone(),
287                                            )
288                                        },
289                                    ),
290                                ))),
291                                |values| {
292                                    let mut buffer = String::new();
293                                    for value in values {
294                                        if let AttributeValue::Value(v) = value {
295                                            buffer.push_str(&v);
296                                        }
297                                    }
298                                    AttributeValue::Value(buffer)
299                                },
300                            ),
301                            tag("'"),
302                        ),
303                    ))),
304                    |contents: Vec<AttributeValue>| {
305                        let mut buffer = String::new();
306                        for content in contents {
307                            if let AttributeValue::Value(mut value) = content {
308                                // End-of-Line Handling for each value
309                                let mut chars: Vec<char> = value.chars().collect();
310                                let mut i = 0;
311                                while i < chars.len() {
312                                    if chars[i] == '\r' {
313                                        if i + 1 < chars.len() && chars[i + 1] == '\n' {
314                                            chars.remove(i);
315                                        } else {
316                                            chars[i] = '\n';
317                                        }
318                                    }
319                                    i += 1;
320                                }
321                                value = chars.into_iter().collect();
322                                buffer.push_str(&value);
323                            }
324                        }
325
326                        AttributeValue::Value(buffer)
327                    },
328                )(input)
329            }
330        }
331    }
332
333    // Namespaces (Third Edition) [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
334    pub fn parse_attribute(
335        input: &str,
336        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
337        entity_source: EntitySource,
338    ) -> IResult<&str, Attribute> {
339        map(
340            alt((
341                tuple((Self::parse_namespace_attribute_name, Self::parse_eq, |i| {
342                    Attribute::parse_attvalue(i, entity_references.clone(), entity_source.clone())
343                })),
344                tuple((Self::parse_qualified_name, Self::parse_eq, |i| {
345                    Self::parse_attvalue(i, entity_references.clone(), entity_source.clone())
346                })),
347            )),
348            |result| match result {
349                (name, _eq, value) if name.prefix.is_some() => {
350                    let prefix = name.prefix.unwrap();
351
352                    if &prefix == "xmlns" {
353                        Attribute::Namespace {
354                            prefix: Prefix::Default,
355                            uri: value,
356                        }
357                    } else {
358                        Attribute::Namespace {
359                            prefix: Prefix::Prefix(prefix),
360                            uri: value,
361                        }
362                    }
363                }
364                (Name { prefix, local_part }, _eq, value) => Attribute::Instance {
365                    name: Name { prefix, local_part },
366                    value,
367                },
368            },
369        )(input)
370    }
371}
372
373#[derive(Clone, Debug, PartialEq, Eq)]
374pub enum TokenizedType {
375    ID,
376    IDREF,
377    IDREFS,
378    ENTITY,
379    ENTITIES,
380    NMTOKEN,
381    NMTOKENS,
382}
383
384impl TokenizedType {
385    // [56] TokenizedType ::= 'ID' | 'IDRef' | 'IDREFS | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
386    fn parse(input: &str) -> IResult<&str, TokenizedType> {
387        alt((
388            value(TokenizedType::IDREFS, tag("IDREFS")),
389            value(TokenizedType::IDREF, tag("IDREF")),
390            value(TokenizedType::ID, tag("ID")),
391            value(TokenizedType::ENTITY, tag("ENTITY")),
392            value(TokenizedType::ENTITIES, tag("ENTITIES")),
393            value(TokenizedType::NMTOKENS, tag("NMTOKENS")),
394            value(TokenizedType::NMTOKEN, tag("NMTOKEN")),
395        ))(input)
396    }
397}
398
399#[derive(Clone, Debug, PartialEq, Eq)]
400pub enum AttType {
401    CDATA,
402    Tokenized(TokenizedType),
403    Enumerated {
404        notation: Option<Vec<Name>>,
405        enumeration: Option<Vec<String>>,
406    },
407}
408
409impl<'a> Parse<'a> for AttType {
410    type Args = ();
411    type Output = IResult<&'a str, Self>;
412    //[54] AttType ::=  StringType | TokenizedType | EnumeratedType
413    fn parse(input: &'a str, _args: Self::Args) -> Self::Output {
414        let (input, att_type) = map(
415            alt((
416                // [55] StringType ::= 'CDATA'
417                value(AttType::CDATA, tag("CDATA")),
418                // [56] TokenizedType ::= 'ID'| 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
419                map(TokenizedType::parse, AttType::Tokenized),
420                Self::parse_enumerated_type,
421            )),
422            |parsed_att_type| parsed_att_type,
423        )(input)?;
424
425        Ok((input, att_type))
426    }
427}
428impl AttType {
429    // [57] EnumeratedType ::= NotationType | Enumeration
430    fn parse_enumerated_type(input: &str) -> IResult<&str, AttType> {
431        alt((Self::parse_notation_type, Self::parse_enumeration))(input)
432    }
433
434    // [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
435    fn parse_notation_type(input: &str) -> IResult<&str, AttType> {
436        map(
437            tuple((
438                tag("NOTATION"),
439                Self::parse_multispace1,
440                delimited(
441                    char('('),
442                    delimited(
443                        Self::parse_multispace0,
444                        separated_list1(
445                            delimited(Self::parse_multispace0, char('|'), Self::parse_multispace0),
446                            Self::parse_name,
447                        ),
448                        Self::parse_multispace0,
449                    ),
450                    char(')'),
451                ),
452            )),
453            |(_notation_literal, _whitespace, names)| AttType::Enumerated {
454                notation: Some(names),
455                enumeration: None,
456            },
457        )(input)
458    }
459
460    // [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
461    fn parse_enumeration(input: &str) -> IResult<&str, AttType> {
462        map(
463            delimited(
464                char('('),
465                separated_list1(
466                    tuple((Self::parse_multispace0, char('|'), Self::parse_multispace0)),
467                    Self::parse_nmtoken,
468                ),
469                char(')'),
470            ),
471            |enumeration| AttType::Enumerated {
472                notation: None,
473                enumeration: Some(enumeration),
474            },
475        )(input)
476    }
477}
478
479#[derive(Clone, Debug, PartialEq, Eq)]
480pub enum DefaultDecl {
481    Required,
482    Implied,
483    Fixed(String),
484    Value(String),
485}
486
487impl<'a> Parse<'a> for DefaultDecl {
488    type Args = (
489        Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
490        EntitySource,
491    );
492    type Output = IResult<&'a str, Self>;
493    // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
494    fn parse(input: &'a str, args: Self::Args) -> Self::Output {
495        let (entity_references, entity_source) = args;
496        let cloned_entity_references = entity_references.clone();
497        let cloned_entity_source = entity_source.clone();
498        alt((
499            value(DefaultDecl::Required, tag("#REQUIRED")),
500            value(DefaultDecl::Implied, tag("#IMPLIED")),
501            map_res(
502                pair(
503                    opt(tuple((tag("#FIXED"), Self::parse_multispace1))),
504                    move |i| {
505                        Attribute::parse_attvalue(
506                            i,
507                            cloned_entity_references.clone(),
508                            cloned_entity_source.clone(),
509                        )
510                    },
511                ),
512                |(fixed, attvalue)| {
513                    if let AttributeValue::Value(value) = attvalue {
514                        match fixed {
515                            Some(_) => Ok(DefaultDecl::Fixed(value)),
516                            None => Ok(DefaultDecl::Value(value)),
517                        }
518                    } else {
519                        Err(nom::Err::Failure(nom::error::Error::new(
520                            format!("Failed to parse attvalue: {attvalue:?}"), // input,
521                            nom::error::ErrorKind::Fail,
522                        )))
523                    }
524                },
525            ),
526        ))(input)
527    }
528}