1use std::{cell::RefCell, collections::HashMap, rc::Rc};
2
3use nom::{
4 branch::alt,
5 bytes::complete::{is_not, tag},
6 character::complete::char,
7 combinator::{map, map_res, opt},
8 multi::{fold_many1, many0, many1},
9 sequence::tuple,
10};
11
12use crate::{
13 attribute::Attribute,
14 error,
15 namespaces::ParseNamespace,
16 parse::Parse,
17 processing_instruction::ProcessingInstruction,
18 prolog::{declaration_content::DeclarationContent, external_id::ExternalID, id::ID},
19 reference::Reference,
20 Document, IResult, Name,
21};
22
23use super::entity::{
24 entity_declaration::{EntityDecl, GeneralEntityDeclaration, ParameterEntityDeclaration},
25 entity_definition::EntityDefinition,
26 entity_value::EntityValue,
27 EntitySource,
28};
29
30#[derive(Clone, PartialEq, Eq)]
31pub enum MarkupDeclaration {
32 Element {
33 name: Name,
34 content_spec: Option<DeclarationContent>,
35 },
36 AttList {
37 name: Name,
38 att_defs: Option<Vec<Attribute>>,
39 },
40 Entity(EntityDecl),
41 Notation {
42 name: Name,
43 id: ID,
44 },
45 ProcessingInstruction(ProcessingInstruction),
46 Comment(Document),
47}
48impl<'a> ParseNamespace<'a> for MarkupDeclaration {}
49
50impl<'a> Parse<'a> for MarkupDeclaration {
51 type Args = (
52 Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
53 EntitySource,
54 );
55 type Output = IResult<&'a str, Option<MarkupDeclaration>>;
56 fn parse(input: &'a str, args: Self::Args) -> Self::Output {
58 let (entity_references, entity_source) = args;
59
60 let (input, res) = opt(alt((
61 Self::parse_element_declaration,
62 |i| {
63 Self::parse_attlist_declaration(i, entity_references.clone(), entity_source.clone())
64 },
65 |i| Self::parse_entity(i, entity_references.clone(), entity_source.clone()),
66 Self::parse_notation,
67 Self::parse_processing_instruction,
68 Self::parse_comment,
69 )))(input)?;
70 Ok((input, res))
71 }
72}
73
74impl MarkupDeclaration {
75 fn parse_element_declaration(input: &str) -> IResult<&str, MarkupDeclaration> {
78 let (
79 input,
80 (_element, _whitespace1, name, _whitespace2, content_spec, _whitespace, _close),
81 ) = tuple((
82 tag("<!ELEMENT"),
83 Self::parse_multispace1,
84 alt((Self::parse_name, Self::parse_qualified_name)),
85 Self::parse_multispace1,
86 |i| DeclarationContent::parse(i, ()),
87 Self::parse_multispace0,
88 tag(">"),
89 ))(input)?;
90
91 Ok((
92 input,
93 MarkupDeclaration::Element {
94 name,
95 content_spec: Some(content_spec),
96 },
97 ))
98 }
99
100 fn parse_notation(input: &str) -> IResult<&str, MarkupDeclaration> {
102 let (input, (_notation, _whitespace1, name, _whitespace2, id, _whitespace3, _close)) =
103 tuple((
104 tag("<!NOTATION"),
105 Self::parse_multispace1,
106 alt((Self::parse_name, Self::parse_qualified_name)),
107 Self::parse_multispace1,
108 |i| ID::parse(i, ()),
109 Self::parse_multispace0,
110 tag(">"),
111 ))(input)?;
112
113 Ok((input, MarkupDeclaration::Notation { name, id }))
114 }
115
116 fn parse_processing_instruction(input: &str) -> IResult<&str, MarkupDeclaration> {
117 let (input, processing_instruction) = ProcessingInstruction::parse(input, ())?;
118 Ok((
119 input,
120 MarkupDeclaration::ProcessingInstruction(processing_instruction),
121 ))
122 }
123 pub fn parse_attlist_declaration(
126 input: &str,
127 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
128 entity_source: EntitySource,
129 ) -> IResult<&str, MarkupDeclaration> {
130 let (input, (_start, _whitespace1, name, att_defs, _whitespace2, _close)) =
131 tuple((
132 tag("<!ATTLIST"),
133 Self::parse_multispace1,
134 alt((Self::parse_name, Self::parse_qualified_name)),
135 many0(|i| {
136 Attribute::parse_definition(i, entity_references.clone(), entity_source.clone())
137 }),
138 Self::parse_multispace0,
139 tag(">"),
140 ))(input)?;
141 Ok((
142 input,
143 MarkupDeclaration::AttList {
144 name,
145 att_defs: Some(att_defs),
146 },
147 ))
148 }
149
150 fn parse_entity(
152 input: &str,
153 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
154 entity_source: EntitySource,
155 ) -> IResult<&str, MarkupDeclaration> {
156 alt((
157 |i| {
158 Self::parse_general_entity_declaration(
159 i,
160 entity_references.clone(),
161 entity_source.clone(),
162 )
163 },
164 |i| {
165 Self::parse_parameter_entity_declaration(
166 i,
167 entity_references.clone(),
168 entity_source.clone(),
169 )
170 },
171 ))(input)
172 }
173
174 fn parse_general_entity_declaration(
176 input: &str,
177 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
178 entity_source: EntitySource,
179 ) -> IResult<&str, MarkupDeclaration> {
180 let (input, (_start, _whitespace1, name, _whitespace2)) = tuple((
181 tag("<!ENTITY"),
182 Self::parse_multispace1,
183 Self::parse_name,
184 Self::parse_multispace1,
185 ))(input)?;
186
187 let (input, (entity_def, _whitespace3, _close)) = tuple((
188 |i| {
189 Self::parse_entity_definition(
190 i,
191 name.clone(),
192 entity_references.clone(),
193 entity_source.clone(),
194 )
195 },
196 Self::parse_multispace0,
197 tag(">"),
198 ))(input)?;
199 Ok((
200 input,
201 MarkupDeclaration::Entity(EntityDecl::General(GeneralEntityDeclaration {
202 name,
203 entity_def,
204 })),
205 ))
206 }
207
208 fn parse_parameter_entity_declaration(
210 input: &str,
211 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
212 entity_source: EntitySource,
213 ) -> IResult<&str, MarkupDeclaration> {
214 let (input, (_start, _whitespace1, _percent, _whitespace2, name, _whitespace3)) =
215 tuple((
216 tag("<!ENTITY"),
217 Self::parse_multispace1,
218 tag("%"),
219 Self::parse_multispace1,
220 Self::parse_name,
221 Self::parse_multispace1,
222 ))(input)?;
223
224 let (input, (entity_def, _whitespace4, _close)) = tuple((
225 |i| {
226 Self::parse_parameter_definition(
227 i,
228 name.clone(),
229 entity_references.clone(),
230 entity_source.clone(),
231 )
232 },
233 Self::parse_multispace0,
234 tag(">"),
235 ))(input)?;
236
237 Ok((
238 input,
239 MarkupDeclaration::Entity(EntityDecl::Parameter(ParameterEntityDeclaration {
240 name,
241 entity_def,
242 })),
243 ))
244 }
245
246 fn parse_parameter_definition(
248 input: &str,
249 name: Name,
250 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
251 entity_source: EntitySource,
252 ) -> IResult<&str, EntityDefinition> {
253 alt((
254 map(
255 |i| {
256 Self::parse_entity_value(
257 i,
258 name.clone(),
259 entity_references.clone(),
260 entity_source.clone(),
261 )
262 },
263 EntityDefinition::EntityValue,
264 ),
265 map(
266 |i| ExternalID::parse(i, ()),
267 |id| EntityDefinition::External {
268 id,
269 n_data: None,
270 text_decl: None,
271 },
272 ),
273 ))(input)
274 }
275
276 fn parse_entity_definition(
278 input: &str,
279 name: Name,
280 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
281 entity_source: EntitySource,
282 ) -> IResult<&str, EntityDefinition> {
283 alt((
284 map(
285 |i| {
286 Self::parse_entity_value(
287 i,
288 name.clone(),
289 entity_references.clone(),
290 entity_source.clone(),
291 )
292 },
293 EntityDefinition::EntityValue,
294 ),
295 map(
296 tuple((
297 |i| ExternalID::parse(i, ()),
298 opt(Self::parse_ndata_declaration),
299 )),
300 |(id, n_data)| EntityDefinition::External {
301 id,
302 n_data,
303 text_decl: None,
304 },
305 ),
306 ))(input)
307 }
308
309 fn parse_ndata_declaration(input: &str) -> IResult<&str, Name> {
311 let (input, _) = Self::parse_multispace1(input)?;
312 let (input, _) = tag("NDATA")(input)?;
313 let (input, _) = Self::parse_multispace1(input)?;
314 let (input, name) = Self::parse_name(input)?;
315
316 Ok((input, name))
317 }
318 pub fn parse_entity_value(
320 input: &str,
321 name: Name,
322 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
323 entity_source: EntitySource,
324 ) -> IResult<&str, EntityValue> {
325 let cloned_references = entity_references.clone();
327 let cloned_references2 = entity_references.clone();
328
329 let cloned_entity_source = entity_source.clone();
330 let cloned_entity_source2 = entity_source.clone();
331 alt((
333 map(
334 tuple((
335 alt((char('\"'), char('\''))),
336 Self::capture_span(alt((
337 move |i| Document::parse_element(i, cloned_references.clone()),
338 Document::parse_cdata_section,
339 ))),
340 alt((char('\"'), char('\''))),
341 )),
342 |(_, (raw_entity_value, doc), _)| {
343 entity_references.borrow_mut().insert(
344 (name.clone(), EntitySource::Internal),
345 EntityValue::Document(doc),
346 );
347 EntityValue::Value(raw_entity_value.to_string())
348 },
349 ),
350 map_res(
351 tuple((
352 alt((char('\"'), char('\''))),
353 Self::capture_span(move |i| {
354 Self::parse(i, (cloned_references2.clone(), entity_source.clone()))
355 }),
356 alt((char('\"'), char('\''))),
357 )),
358 |(_, (raw_internal_subset, data), _)| match data {
359 Some(data) => {
360 entity_references.borrow_mut().insert(
361 (name.clone(), EntitySource::Internal),
362 EntityValue::MarkupDecl(Box::new(data)),
363 );
364
365 Ok(EntityValue::Value(raw_internal_subset.to_string()))
366 }
367 None => Err(nom::Err::Failure((
368 "No Internal Subset",
369 nom::error::ErrorKind::Fail,
370 ))),
371 },
372 ),
373 map(
374 tuple((
375 tag("\""),
376 opt(many1(alt((
377 map(
378 move |i| Reference::parse(i, cloned_entity_source.clone()),
379 EntityValue::Reference,
380 ),
381 map(
382 fold_many1(
383 map(is_not("%&\""), |s: &str| s.to_string()),
384 String::new,
385 |mut acc: String, item: String| {
386 acc.push_str(&item);
387 acc
388 },
389 ),
390 EntityValue::Value,
391 ),
392 )))),
393 tag("\""),
394 )),
395 |(_, maybe_entities, _)| {
396 let mut buffer = String::new();
397 if let Some(entities) = maybe_entities {
398 match entities.as_slice() {
399 [EntityValue::Reference(_)] => return entities[0].clone(),
400 _ => {
401 for entity in entities {
402 match entity {
403 EntityValue::Reference(reference) => {
404 let ref_string = Self::get_reference_value(reference);
405 buffer.push_str(&ref_string);
406 }
407 EntityValue::Value(val) => {
408 buffer.push_str(&val);
409 }
410 _ => {} }
412 }
413 }
414 }
415 }
416 EntityValue::Value(buffer)
417 },
418 ),
419 map(
420 tuple((
421 tag("\'"),
422 opt(many1(alt((
423 map(
424 move |i| Reference::parse(i, cloned_entity_source2.clone()),
425 EntityValue::Reference,
426 ),
427 map(
428 fold_many1(
429 map(is_not("%&'"), |s: &str| s.to_string()),
430 String::new,
431 |mut acc: String, item: String| {
432 acc.push_str(&item);
433 acc
434 },
435 ),
436 EntityValue::Value,
437 ),
438 )))),
439 tag("\'"),
440 )),
441 |(_, maybe_entities, _)| {
442 let mut buffer = String::new();
443
444 if let Some(entities) = maybe_entities {
445 match entities.as_slice() {
446 [EntityValue::Reference(_)] => return entities[0].clone(),
447 _ => {
448 for entity in entities {
449 match entity {
450 EntityValue::Reference(reference) => {
451 let ref_string = Self::get_reference_value(reference);
452 buffer.push_str(&ref_string);
453 }
454 EntityValue::Value(val) => {
455 buffer.push_str(&val);
456 }
457 _ => {} }
459 }
460 }
461 }
462 }
463 EntityValue::Value(buffer)
464 },
465 ),
466 ))(input)
467 }
468
469 fn parse_comment(input: &str) -> IResult<&str, MarkupDeclaration> {
470 let (remaining, doc) = Document::parse_comment(input)?;
471 match doc {
472 Document::Comment(comment) => Ok((
473 remaining,
474 MarkupDeclaration::Comment(Document::Comment(comment)),
475 )),
476 e => {
477 eprintln!("{e:?}");
478 Err(nom::Err::Error(error::Error::NomError(
479 nom::error::Error::new(
480 "parse_comment` unexpected Document".to_string(),
481 nom::error::ErrorKind::Verify,
482 ),
483 )))
484 }
485 }
486 }
487 fn get_reference_value(reference: Reference) -> String {
488 match reference {
489 Reference::EntityRef(value) => value.local_part,
490 Reference::CharRef(value) => value,
491 }
492 }
493}