nom_xml/prolog/
doctype.rs

1use crate::{
2    namespaces::ParseNamespace,
3    parse::Parse,
4    prolog::subset::{
5        entity::{
6            entity_declaration::EntityDecl, entity_definition::EntityDefinition,
7            entity_value::EntityValue,
8        },
9        Subset,
10    },
11    Config, IResult, Name,
12};
13use nom::{
14    bytes::complete::tag,
15    combinator::opt,
16    sequence::{delimited, pair, preceded, tuple},
17};
18use std::{cell::RefCell, collections::HashMap, rc::Rc};
19
20use super::{
21    external_id::ExternalID,
22    subset::{entity::EntitySource, markup_declaration::MarkupDeclaration},
23};
24
25#[derive(Clone, PartialEq, Eq)]
26pub struct DocType {
27    pub name: Name,
28    pub external_id: Option<ExternalID>,
29    pub subset: Option<Vec<Subset>>,
30}
31
32impl<'a> Parse<'a> for DocType {
33    type Args = (
34        Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
35        &'a Config,
36    );
37
38    type Output = IResult<&'a str, Self>;
39
40    fn parse(input: &'a str, args: Self::Args) -> Self::Output {
41        let (entity_references, config) = args;
42        let mut merged_subsets = vec![];
43        let (input, (_open_tag, _whitespace1, name, external_id, _whitespace2)) = tuple((
44            tag("<!DOCTYPE"),
45            Self::parse_multispace1,
46            Self::parse_name,
47            opt(preceded(Self::parse_multispace1, |i| {
48                ExternalID::parse(i, ())
49            })),
50            Self::parse_multispace0,
51        ))(input)?;
52        if let Some(external_id) = external_id {
53            let mut external_subsets = match external_id.get_external_entity_from_id(
54                input,
55                entity_references.clone(),
56                config,
57            ) {
58                Ok(subsets) => subsets,
59                Err(_) => None,
60            };
61            let (input, (mut subset, _whitespace3, _close_tag, _whitespace4)) =
62                tuple((
63                    opt(delimited(
64                        pair(tag("["), Self::parse_multispace0),
65                        |i| {
66                            Subset::parse(
67                                i,
68                                (entity_references.clone(), config, EntitySource::External),
69                            )
70                        },
71                        pair(Self::parse_multispace0, tag("]")),
72                    )),
73                    Self::parse_multispace0,
74                    tag(">"),
75                    Self::parse_multispace0,
76                ))(input)?;
77            if let Some(subset) = &mut subset {
78                subset.iter_mut().for_each(|subset| match subset {
79                    Subset::MarkupDecl(MarkupDeclaration::Entity(EntityDecl::General(
80                        entity_decl,
81                    )))
82                    | Subset::MarkupDecl(MarkupDeclaration::Entity(EntityDecl::Parameter(
83                        entity_decl,
84                    ))) => {
85                        if let EntityDefinition::EntityValue(EntityValue::Reference(ref_val)) =
86                            &mut entity_decl.entity_def
87                        {
88                            ref_val.normalize_entity(entity_references.clone());
89                        }
90                    }
91
92                    _ => {}
93                });
94                merged_subsets.extend(subset.clone());
95            }
96            if let Some(subset) = &mut external_subsets {
97                subset.iter_mut().for_each(|subset| match subset {
98                    Subset::MarkupDecl(MarkupDeclaration::Entity(EntityDecl::General(
99                        entity_decl,
100                    )))
101                    | Subset::MarkupDecl(MarkupDeclaration::Entity(EntityDecl::Parameter(
102                        entity_decl,
103                    ))) => {
104                        if let EntityDefinition::EntityValue(EntityValue::Reference(ref_val)) =
105                            &mut entity_decl.entity_def
106                        {
107                            ref_val.normalize_entity(entity_references.clone());
108                        }
109                    }
110                    _ => {}
111                });
112                merged_subsets.extend(subset.clone());
113            }
114            // we need to create a subsets that merges external subsets with subset
115            if merged_subsets.is_empty() {
116                Ok((
117                    input,
118                    Self {
119                        name,
120                        external_id: Some(external_id),
121                        subset: None,
122                    },
123                ))
124            } else {
125                Ok((
126                    input,
127                    Self {
128                        name,
129                        external_id: Some(external_id),
130                        subset: Some(merged_subsets),
131                    },
132                ))
133            }
134        } else {
135            let (input, (mut subset, _whitespace3, _close_tag, _whitespace4)) =
136                tuple((
137                    opt(delimited(
138                        pair(tag("["), Self::parse_multispace0),
139                        |i| {
140                            Subset::parse(
141                                i,
142                                (entity_references.clone(), config, EntitySource::Internal),
143                            )
144                        },
145                        pair(Self::parse_multispace0, tag("]")),
146                    )),
147                    Self::parse_multispace0,
148                    tag(">"),
149                    Self::parse_multispace0,
150                ))(input)?;
151            if let Some(subset) = &mut subset {
152                subset.iter_mut().for_each(|subset| {
153                    match subset {
154                        //match internal_subset {
155                        Subset::MarkupDecl(MarkupDeclaration::Entity(EntityDecl::General(
156                            entity_decl,
157                        )))
158                        | Subset::MarkupDecl(MarkupDeclaration::Entity(EntityDecl::Parameter(
159                            entity_decl,
160                        ))) => {
161                            if let EntityDefinition::EntityValue(EntityValue::Reference(ref_val)) =
162                                &mut entity_decl.entity_def
163                            {
164                                ref_val.normalize_entity(entity_references.clone());
165                            }
166                        }
167
168                        _ => {}
169                    }
170                });
171            }
172            Ok((
173                input,
174                Self {
175                    name,
176                    external_id,
177                    subset,
178                },
179            ))
180        }
181    }
182}
183
184//TODO integrate this
185impl DocType {
186    pub fn extract_entities(&self) -> Option<Vec<Box<Subset>>> {
187        let entities: Vec<_> = self
188            .subset
189            .as_ref()?
190            .iter()
191            .filter_map(|item| {
192                if let Subset::MarkupDecl(MarkupDeclaration::Entity(_)) = item {
193                    Some(Box::new(item.clone()))
194                } else {
195                    None
196                }
197            })
198            .collect();
199
200        if entities.is_empty() {
201            None
202        } else {
203            Some(entities)
204        }
205    }
206    //TODO: figure out how to integrate this or remove
207    // fn _parse_qualified_doctype(
208    //     input: &str,
209    //     entity_references: Rc<RefCell<HashMap<(Name,EntityType), EntityValue>>>,
210    // ) -> IResult<&str, DocType> {
211    //     let (input, _) = tag("<!DOCTYPE")(input)?;
212    //     let (input, _) = Self::parse_multispace1(input)?;
213    //     let (input, name) = Self::parse_qualified_name(input)?;
214
215    //     let (input, external_id) = opt(preceded(Self::parse_multispace1, |i| {
216    //         ExternalID::parse(i, ())
217    //     }))(input)?;
218
219    //     let (input, _) = Self::parse_multispace0(input)?;
220
221    //     let (input, int_subset) = opt(delimited(
222    //         pair(tag("["), Self::parse_multispace0),
223    //         |i| InternalSubset::parse(i, entity_references.clone()),
224    //         pair(Self::parse_multispace0, tag("]")),
225    //     ))(input)?;
226
227    //     let (input, _) = Self::parse_multispace0(input)?;
228    //     let (input, _) = tag(">")(input)?;
229    //     let (input, _) = Self::parse_multispace0(input)?;
230
231    //     Ok((
232    //         input,
233    //         Self {
234    //             name,
235    //             external_id,
236    //             int_subset,
237    //         },
238    //     ))
239    // }
240}
241
242impl<'a> ParseNamespace<'a> for DocType {}