nom_xml/prolog/subset/
markup_declaration.rs

1use std::{cell::RefCell, collections::HashMap, rc::Rc};
2
3use nom::{
4    branch::alt,
5    bytes::complete::{is_not, tag},
6    character::complete::char,
7    combinator::{map, map_res, opt},
8    multi::{fold_many1, many0, many1},
9    sequence::tuple,
10};
11
12use crate::{
13    attribute::Attribute,
14    error,
15    namespaces::ParseNamespace,
16    parse::Parse,
17    processing_instruction::ProcessingInstruction,
18    prolog::{declaration_content::DeclarationContent, external_id::ExternalID, id::ID},
19    reference::Reference,
20    Document, IResult, Name,
21};
22
23use super::entity::{
24    entity_declaration::{EntityDecl, GeneralEntityDeclaration, ParameterEntityDeclaration},
25    entity_definition::EntityDefinition,
26    entity_value::EntityValue,
27    EntitySource,
28};
29
30#[derive(Clone, PartialEq, Eq)]
31pub enum MarkupDeclaration {
32    Element {
33        name: Name,
34        content_spec: Option<DeclarationContent>,
35    },
36    AttList {
37        name: Name,
38        att_defs: Option<Vec<Attribute>>,
39    },
40    Entity(EntityDecl),
41    Notation {
42        name: Name,
43        id: ID,
44    },
45    ProcessingInstruction(ProcessingInstruction),
46    Comment(Document),
47}
48impl<'a> ParseNamespace<'a> for MarkupDeclaration {}
49
50impl<'a> Parse<'a> for MarkupDeclaration {
51    type Args = (
52        Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
53        EntitySource,
54    );
55    type Output = IResult<&'a str, Option<MarkupDeclaration>>;
56    // [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
57    fn parse(input: &'a str, args: Self::Args) -> Self::Output {
58        let (entity_references, entity_source) = args;
59
60        let (input, res) = opt(alt((
61            Self::parse_element_declaration,
62            |i| {
63                Self::parse_attlist_declaration(i, entity_references.clone(), entity_source.clone())
64            },
65            |i| Self::parse_entity(i, entity_references.clone(), entity_source.clone()),
66            Self::parse_notation,
67            Self::parse_processing_instruction,
68            Self::parse_comment,
69        )))(input)?;
70        Ok((input, res))
71    }
72}
73
74impl MarkupDeclaration {
75    // [45] elementdecl	::= '<!ELEMENT' S Name S contentspec S? '>'
76    // Namespaces (Third Edition) [17] elementdecl	::= '<!ELEMENT' S QName S contentspec S? '>'
77    fn parse_element_declaration(input: &str) -> IResult<&str, MarkupDeclaration> {
78        let (
79            input,
80            (_element, _whitespace1, name, _whitespace2, content_spec, _whitespace, _close),
81        ) = tuple((
82            tag("<!ELEMENT"),
83            Self::parse_multispace1,
84            alt((Self::parse_name, Self::parse_qualified_name)),
85            Self::parse_multispace1,
86            |i| DeclarationContent::parse(i, ()),
87            Self::parse_multispace0,
88            tag(">"),
89        ))(input)?;
90
91        Ok((
92            input,
93            MarkupDeclaration::Element {
94                name,
95                content_spec: Some(content_spec),
96            },
97        ))
98    }
99
100    // [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'	[VC: Unique Notation Name]
101    fn parse_notation(input: &str) -> IResult<&str, MarkupDeclaration> {
102        let (input, (_notation, _whitespace1, name, _whitespace2, id, _whitespace3, _close)) =
103            tuple((
104                tag("<!NOTATION"),
105                Self::parse_multispace1,
106                alt((Self::parse_name, Self::parse_qualified_name)),
107                Self::parse_multispace1,
108                |i| ID::parse(i, ()),
109                Self::parse_multispace0,
110                tag(">"),
111            ))(input)?;
112
113        Ok((input, MarkupDeclaration::Notation { name, id }))
114    }
115
116    fn parse_processing_instruction(input: &str) -> IResult<&str, MarkupDeclaration> {
117        let (input, processing_instruction) = ProcessingInstruction::parse(input, ())?;
118        Ok((
119            input,
120            MarkupDeclaration::ProcessingInstruction(processing_instruction),
121        ))
122    }
123    // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
124    // Namespaces (Third Edition) [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'
125    pub fn parse_attlist_declaration(
126        input: &str,
127        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
128        entity_source: EntitySource,
129    ) -> IResult<&str, MarkupDeclaration> {
130        let (input, (_start, _whitespace1, name, att_defs, _whitespace2, _close)) =
131            tuple((
132                tag("<!ATTLIST"),
133                Self::parse_multispace1,
134                alt((Self::parse_name, Self::parse_qualified_name)),
135                many0(|i| {
136                    Attribute::parse_definition(i, entity_references.clone(), entity_source.clone())
137                }),
138                Self::parse_multispace0,
139                tag(">"),
140            ))(input)?;
141        Ok((
142            input,
143            MarkupDeclaration::AttList {
144                name,
145                att_defs: Some(att_defs),
146            },
147        ))
148    }
149
150    // [70] EntityDecl ::= GEDecl | PEDecl
151    fn parse_entity(
152        input: &str,
153        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
154        entity_source: EntitySource,
155    ) -> IResult<&str, MarkupDeclaration> {
156        alt((
157            |i| {
158                Self::parse_general_entity_declaration(
159                    i,
160                    entity_references.clone(),
161                    entity_source.clone(),
162                )
163            },
164            |i| {
165                Self::parse_parameter_entity_declaration(
166                    i,
167                    entity_references.clone(),
168                    entity_source.clone(),
169                )
170            },
171        ))(input)
172    }
173
174    // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
175    fn parse_general_entity_declaration(
176        input: &str,
177        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
178        entity_source: EntitySource,
179    ) -> IResult<&str, MarkupDeclaration> {
180        let (input, (_start, _whitespace1, name, _whitespace2)) = tuple((
181            tag("<!ENTITY"),
182            Self::parse_multispace1,
183            Self::parse_name,
184            Self::parse_multispace1,
185        ))(input)?;
186
187        let (input, (entity_def, _whitespace3, _close)) = tuple((
188            |i| {
189                Self::parse_entity_definition(
190                    i,
191                    name.clone(),
192                    entity_references.clone(),
193                    entity_source.clone(),
194                )
195            },
196            Self::parse_multispace0,
197            tag(">"),
198        ))(input)?;
199        Ok((
200            input,
201            MarkupDeclaration::Entity(EntityDecl::General(GeneralEntityDeclaration {
202                name,
203                entity_def,
204            })),
205        ))
206    }
207
208    // [72]    PEDecl ::=    '<!ENTITY' S '%' S Name S PEDef S? '>'
209    fn parse_parameter_entity_declaration(
210        input: &str,
211        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
212        entity_source: EntitySource,
213    ) -> IResult<&str, MarkupDeclaration> {
214        let (input, (_start, _whitespace1, _percent, _whitespace2, name, _whitespace3)) =
215            tuple((
216                tag("<!ENTITY"),
217                Self::parse_multispace1,
218                tag("%"),
219                Self::parse_multispace1,
220                Self::parse_name,
221                Self::parse_multispace1,
222            ))(input)?;
223
224        let (input, (entity_def, _whitespace4, _close)) = tuple((
225            |i| {
226                Self::parse_parameter_definition(
227                    i,
228                    name.clone(),
229                    entity_references.clone(),
230                    entity_source.clone(),
231                )
232            },
233            Self::parse_multispace0,
234            tag(">"),
235        ))(input)?;
236
237        Ok((
238            input,
239            MarkupDeclaration::Entity(EntityDecl::Parameter(ParameterEntityDeclaration {
240                name,
241                entity_def,
242            })),
243        ))
244    }
245
246    // [74] PEDef ::= EntityValue | ExternalID
247    fn parse_parameter_definition(
248        input: &str,
249        name: Name,
250        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
251        entity_source: EntitySource,
252    ) -> IResult<&str, EntityDefinition> {
253        alt((
254            map(
255                |i| {
256                    Self::parse_entity_value(
257                        i,
258                        name.clone(),
259                        entity_references.clone(),
260                        entity_source.clone(),
261                    )
262                },
263                EntityDefinition::EntityValue,
264            ),
265            map(
266                |i| ExternalID::parse(i, ()),
267                |id| EntityDefinition::External {
268                    id,
269                    n_data: None,
270                    text_decl: None,
271                },
272            ),
273        ))(input)
274    }
275
276    // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
277    fn parse_entity_definition(
278        input: &str,
279        name: Name,
280        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
281        entity_source: EntitySource,
282    ) -> IResult<&str, EntityDefinition> {
283        alt((
284            map(
285                |i| {
286                    Self::parse_entity_value(
287                        i,
288                        name.clone(),
289                        entity_references.clone(),
290                        entity_source.clone(),
291                    )
292                },
293                EntityDefinition::EntityValue,
294            ),
295            map(
296                tuple((
297                    |i| ExternalID::parse(i, ()),
298                    opt(Self::parse_ndata_declaration),
299                )),
300                |(id, n_data)| EntityDefinition::External {
301                    id,
302                    n_data,
303                    text_decl: None,
304                },
305            ),
306        ))(input)
307    }
308
309    // [76] NDataDecl ::= S 'NDATA' S Name
310    fn parse_ndata_declaration(input: &str) -> IResult<&str, Name> {
311        let (input, _) = Self::parse_multispace1(input)?;
312        let (input, _) = tag("NDATA")(input)?;
313        let (input, _) = Self::parse_multispace1(input)?;
314        let (input, name) = Self::parse_name(input)?;
315
316        Ok((input, name))
317    }
318    // [9] EntityValue	::= '"' ([^%&"] | PEReference | Reference)* '"'|  "'" ([^%&'] | PEReference | Reference)* "'"
319    pub fn parse_entity_value(
320        input: &str,
321        name: Name,
322        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
323        entity_source: EntitySource,
324    ) -> IResult<&str, EntityValue> {
325        //TODO: I hate this. Refactor is possible
326        let cloned_references = entity_references.clone();
327        let cloned_references2 = entity_references.clone();
328
329        let cloned_entity_source = entity_source.clone();
330        let cloned_entity_source2 = entity_source.clone();
331        // TODO: removed alt((alt(()))) need to test if functionality is the same
332        alt((
333            map(
334                tuple((
335                    alt((char('\"'), char('\''))),
336                    Self::capture_span(alt((
337                        move |i| Document::parse_element(i, cloned_references.clone()),
338                        Document::parse_cdata_section,
339                    ))),
340                    alt((char('\"'), char('\''))),
341                )),
342                |(_, (raw_entity_value, doc), _)| {
343                    entity_references.borrow_mut().insert(
344                        (name.clone(), EntitySource::Internal),
345                        EntityValue::Document(doc),
346                    );
347                    EntityValue::Value(raw_entity_value.to_string())
348                },
349            ),
350            map_res(
351                tuple((
352                    alt((char('\"'), char('\''))),
353                    Self::capture_span(move |i| {
354                        Self::parse(i, (cloned_references2.clone(), entity_source.clone()))
355                    }),
356                    alt((char('\"'), char('\''))),
357                )),
358                |(_, (raw_internal_subset, data), _)| match data {
359                    Some(data) => {
360                        entity_references.borrow_mut().insert(
361                            (name.clone(), EntitySource::Internal),
362                            EntityValue::MarkupDecl(Box::new(data)),
363                        );
364
365                        Ok(EntityValue::Value(raw_internal_subset.to_string()))
366                    }
367                    None => Err(nom::Err::Failure((
368                        "No Internal Subset",
369                        nom::error::ErrorKind::Fail,
370                    ))),
371                },
372            ),
373            map(
374                tuple((
375                    tag("\""),
376                    opt(many1(alt((
377                        map(
378                            move |i| Reference::parse(i, cloned_entity_source.clone()),
379                            EntityValue::Reference,
380                        ),
381                        map(
382                            fold_many1(
383                                map(is_not("%&\""), |s: &str| s.to_string()),
384                                String::new,
385                                |mut acc: String, item: String| {
386                                    acc.push_str(&item);
387                                    acc
388                                },
389                            ),
390                            EntityValue::Value,
391                        ),
392                    )))),
393                    tag("\""),
394                )),
395                |(_, maybe_entities, _)| {
396                    let mut buffer = String::new();
397                    if let Some(entities) = maybe_entities {
398                        match entities.as_slice() {
399                            [EntityValue::Reference(_)] => return entities[0].clone(),
400                            _ => {
401                                for entity in entities {
402                                    match entity {
403                                        EntityValue::Reference(reference) => {
404                                            let ref_string = Self::get_reference_value(reference);
405                                            buffer.push_str(&ref_string);
406                                        }
407                                        EntityValue::Value(val) => {
408                                            buffer.push_str(&val);
409                                        }
410                                        _ => {} // Handle other possible variants if needed.
411                                    }
412                                }
413                            }
414                        }
415                    }
416                    EntityValue::Value(buffer)
417                },
418            ),
419            map(
420                tuple((
421                    tag("\'"),
422                    opt(many1(alt((
423                        map(
424                            move |i| Reference::parse(i, cloned_entity_source2.clone()),
425                            EntityValue::Reference,
426                        ),
427                        map(
428                            fold_many1(
429                                map(is_not("%&'"), |s: &str| s.to_string()),
430                                String::new,
431                                |mut acc: String, item: String| {
432                                    acc.push_str(&item);
433                                    acc
434                                },
435                            ),
436                            EntityValue::Value,
437                        ),
438                    )))),
439                    tag("\'"),
440                )),
441                |(_, maybe_entities, _)| {
442                    let mut buffer = String::new();
443
444                    if let Some(entities) = maybe_entities {
445                        match entities.as_slice() {
446                            [EntityValue::Reference(_)] => return entities[0].clone(),
447                            _ => {
448                                for entity in entities {
449                                    match entity {
450                                        EntityValue::Reference(reference) => {
451                                            let ref_string = Self::get_reference_value(reference);
452                                            buffer.push_str(&ref_string);
453                                        }
454                                        EntityValue::Value(val) => {
455                                            buffer.push_str(&val);
456                                        }
457                                        _ => {} // Handle other possible variants if needed.
458                                    }
459                                }
460                            }
461                        }
462                    }
463                    EntityValue::Value(buffer)
464                },
465            ),
466        ))(input)
467    }
468
469    fn parse_comment(input: &str) -> IResult<&str, MarkupDeclaration> {
470        let (remaining, doc) = Document::parse_comment(input)?;
471        match doc {
472            Document::Comment(comment) => Ok((
473                remaining,
474                MarkupDeclaration::Comment(Document::Comment(comment)),
475            )),
476            e => {
477                eprintln!("{e:?}");
478                Err(nom::Err::Error(error::Error::NomError(
479                    nom::error::Error::new(
480                        "parse_comment` unexpected Document".to_string(),
481                        nom::error::ErrorKind::Verify,
482                    ),
483                )))
484            }
485        }
486    }
487    fn get_reference_value(reference: Reference) -> String {
488        match reference {
489            Reference::EntityRef(value) => value.local_part,
490            Reference::CharRef(value) => value,
491        }
492    }
493}