nom_xml/prolog/subset/
mod.rs

1use std::{cell::RefCell, collections::HashMap, rc::Rc};
2
3use crate::{
4    config::ExternalEntityParseConfig, prolog::external_id::ExternalID, reference::Reference,
5    Document, IResult, Name,
6};
7
8use self::{
9    entity::entity_declaration::{EntityDecl, EntityDeclaration},
10    entity::entity_value::EntityValue,
11    entity::EntitySource,
12};
13
14pub mod entity;
15
16pub mod markup_declaration;
17
18use entity::entity_definition::EntityDefinition;
19use nom::{branch::alt, combinator::map, multi::many0};
20
21use crate::{
22    attribute::Attribute, namespaces::ParseNamespace, parse::Parse,
23    prolog::subset::markup_declaration::MarkupDeclaration, reference::ParseReference, Config,
24};
25
26//TODO handle circular references in all entity replacements
27#[derive(Clone, PartialEq, Eq)]
28pub enum Subset {
29    MarkupDecl(MarkupDeclaration),
30    DeclSep {
31        reference: Reference,
32        expansion: Option<Box<Subset>>,
33    },
34    None,
35}
36
37impl Subset {
38    pub fn get_entity(&self) -> Option<&EntityDeclaration> {
39        match self {
40            Subset::MarkupDecl(MarkupDeclaration::Entity(decl)) => match decl {
41                EntityDecl::General(general_decl) => Some(general_decl),
42                EntityDecl::Parameter(parameter_decl) => Some(parameter_decl),
43            },
44            _ => None,
45        }
46    }
47}
48
49impl<'a> ParseNamespace<'a> for Subset {}
50
51impl<'a> Parse<'a> for Subset {
52    type Args = (
53        Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
54        &'a Config,
55        EntitySource,
56    );
57    type Output = IResult<&'a str, Vec<Subset>>;
58
59    //[28b]	intSubset ::= (markupdecl | DeclSep)*
60    fn parse(input: &'a str, args: Self::Args) -> Self::Output {
61        let (entity_references, config, entity_source) = args;
62        let (input, parsed) = many0(alt((
63            |i| {
64                let (i, decl_sep) =
65                    Self::parse_decl_sep(i, entity_references.clone(), entity_source.clone())?;
66                match decl_sep {
67                    Some(decl_sep) => Ok((i, Some(decl_sep))),
68                    None => Ok((i, None)),
69                }
70            },
71            |i| {
72                let (i, result) = MarkupDeclaration::parse(
73                    i,
74                    (entity_references.clone(), entity_source.clone()),
75                )?;
76                match result {
77                    Some(markup_declaration) => {
78                        Ok((i, Some(Subset::MarkupDecl(markup_declaration))))
79                    }
80                    None => Err(nom::Err::Error(nom::error::make_error(
81                        input,
82                        nom::error::ErrorKind::Verify,
83                    ))),
84                }
85            },
86        )))(input)?;
87        let mut consolidated: Vec<Subset> = vec![];
88        let mut external_subsets: Vec<Subset> = vec![];
89        for mut subset in parsed.into_iter().flatten() {
90            match &mut subset {
91                Subset::MarkupDecl(markup_declaration) => match markup_declaration {
92                    MarkupDeclaration::Entity(entity) => {
93                        match entity {
94                            EntityDecl::Parameter(EntityDeclaration {
95                                name,
96                                entity_def:
97                                    EntityDefinition::External {
98                                        id: ExternalID::System(ext_file),
99                                        ..
100                                    },
101                                ..
102                            }) => {
103                                let Config {
104                                    external_parse_config:
105                                        ExternalEntityParseConfig {
106                                            allow_ext_parse,
107                                            base_directory,
108                                            ..
109                                        },
110                                } = config;
111                                if *allow_ext_parse {
112                                    let file_path = match base_directory {
113                                        Some(base) => format!("{}/{}", base, ext_file),
114                                        None => ext_file.clone(),
115                                    };
116                                    let _processed_external_entity =
117                                        Document::process_external_entity_file(
118                                            file_path,
119                                            name,
120                                            config,
121                                            entity_references.clone(),
122                                        );
123                                    if let Ok(Some(ext_subsets)) =
124                                        Document::get_external_entity_from_declaration(
125                                            entity.clone(),
126                                            entity_references.clone(),
127                                            config,
128                                        )
129                                    {
130                                        external_subsets.extend(ext_subsets.clone())
131                                    }
132                                }
133                            }
134                            _ => {
135                                if let Ok(Some(ext_subsets)) =
136                                    Document::get_external_entity_from_declaration(
137                                        entity.clone(),
138                                        entity_references.clone(),
139                                        config,
140                                    )
141                                {
142                                    consolidated.extend(ext_subsets);
143                                }
144                            }
145                        }
146
147                    }
148                    MarkupDeclaration::AttList {
149                        name,
150                        att_defs: Some(new_defs),
151                    } =>
152                    {
153                        if let Some(existing) = consolidated.iter_mut().find(|i| {
154                            matches!(i, Subset::MarkupDecl(MarkupDeclaration::AttList { name: existing_name, .. }) if existing_name == name)
155                        }) {
156                            if let Subset::MarkupDecl(MarkupDeclaration::AttList { att_defs: Some(existing_defs), .. }) = existing {
157                                existing_defs.extend(new_defs.clone());
158                            }
159                            continue
160                         }
161                    }
162                    _ => {
163                        // Do nothing. Unneeded? processing for other types
164                    }
165                },
166                Subset::DeclSep {
167                    reference: Reference::EntityRef(name),
168                    expansion,
169                } => {
170                    if let Some(EntityValue::MarkupDecl(inner_expansion)) = entity_references
171                        .borrow()
172                        .get(&(name.clone(), EntitySource::Internal))
173                    {
174                        let mut modified_inner_expansion = *inner_expansion.clone();
175
176                        if let MarkupDeclaration::AttList {
177                            att_defs: Some(ref mut defs),
178                            ..
179                        } = modified_inner_expansion
180                        {
181                            for attribute in defs {
182                                if let Attribute::Definition { ref mut source, .. } = attribute {
183                                    *source = EntitySource::Internal;
184                                }
185                            }
186                        }
187
188                        *expansion = Some(Box::new(Subset::MarkupDecl(
189                            modified_inner_expansion.clone(),
190                        )));
191                    }
192
193                    if let Some(entity_value) = entity_references
194                        .borrow()
195                        .get(&(name.clone(), EntitySource::External))
196                    {
197                        match entity_value {
198                        EntityValue::MarkupDecl(inner_expansion) => {
199                            let mut modified_inner_expansion = *inner_expansion.clone();
200                            if let MarkupDeclaration::AttList {
201                                att_defs: Some(ref mut defs),
202                                ..
203                            } = modified_inner_expansion
204                            {
205                                for attribute in defs {
206                                    if let Attribute::Definition { ref mut source, .. } = attribute {
207                                        *source = EntitySource::External;
208                                    }
209                                }
210                            }
211
212                            *expansion = Some(Box::new(Subset::MarkupDecl(
213                                modified_inner_expansion.clone(),
214                            )));
215                        },
216                        EntityValue::Document(_doc) => {
217                            for external_subset in &external_subsets {
218                                *expansion = Some(Box::new(external_subset.clone()));
219                            }
220
221                        },
222                        EntityValue::ParameterReference(_reference) => {
223                            unimplemented!("External EntityValue::ParameterReference encountered, needs implementation")
224                        },
225                        EntityValue::Reference(_reference) => {
226                            unimplemented!("External EntityValue::Reference encountered, needs implementation")
227                        },
228                        EntityValue::Value(_val) => {
229                            unimplemented!("External EntityValue::Value encountered, needs implementation")
230                        },
231
232                    }}
233
234
235                    if let Some(Subset::MarkupDecl(MarkupDeclaration::AttList {
236                        name,
237                        att_defs: Some(new_defs),
238                    })) = expansion.as_deref()
239                    {
240                        if let Some(Subset::MarkupDecl(MarkupDeclaration::AttList { att_defs: Some(existing_defs), .. })) = consolidated.iter_mut().find(|i| {
241                                matches!(i, Subset::MarkupDecl(MarkupDeclaration::AttList { name: existing_name, .. }) if existing_name == name)
242                            }) {
243                                existing_defs.extend(new_defs.clone());
244                                continue;
245                            }
246                        consolidated.push(Subset::MarkupDecl(MarkupDeclaration::AttList {
247                            name: name.clone(),
248                            att_defs: Some(new_defs.clone()),
249                        }));
250                    }
251                }
252                variant => {
253                        unimplemented!("Subset Variant unimplemented: {variant:#?}");
254                    }
255            }
256            consolidated.push(subset);
257        }
258        Ok((input, consolidated))
259    }
260}
261
262impl ParseDeclSep for Subset {
263    type Output = Option<Subset>;
264
265    // [28a] DeclSep ::=  PEReference | S
266    fn parse_decl_sep(
267        input: &str,
268        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
269        entity_source: EntitySource,
270    ) -> IResult<&str, Self::Output> {
271        let (input, decl_sep) = alt((
272            map(Reference::parse_parameter_reference, |reference| {
273                let expansion =
274                    Self::expand_entity(&reference, &entity_references, entity_source.clone());
275
276                let expanded_subset = match &expansion {
277                    Some(EntityValue::MarkupDecl(elem)) => Some(elem.clone()),
278                    _ => None,
279                };
280
281                Some(Subset::DeclSep {
282                    reference,
283                    expansion: expanded_subset.map(|subset| Box::new(Subset::MarkupDecl(*subset))),
284                })
285            }),
286            map(Self::parse_multispace1, |_| None),
287        ))(input)?;
288        Ok((input, decl_sep))
289    }
290}
291
292pub trait ParseDeclSep {
293    type Output;
294    // [28a] DeclSep ::=  PEReference | S
295    fn parse_decl_sep(
296        input: &str,
297        entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
298        entity_source: EntitySource,
299    ) -> IResult<&str, Self::Output>;
300    fn expand_entity(
301        reference: &Reference,
302        entity_references: &Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
303        entity_source: EntitySource,
304    ) -> Option<EntityValue> {
305        match reference {
306            Reference::EntityRef(name) => {
307                let entities = entity_references.borrow();
308                entities
309                    .get(&(name.clone(), entity_source.clone()))
310                    .cloned()
311            }
312            Reference::CharRef(_) => None,
313        }
314    }
315}