1use crate::{
2 namespaces::ParseNamespace,
3 parse::Parse,
4 prolog::subset::entity::{entity_value::EntityValue, EntitySource},
5 reference::{ParseReference, Reference},
6 IResult, Name,
7};
8use nom::{
9 branch::alt,
10 bytes::complete::{tag, take_till1},
11 character::complete::char,
12 combinator::{map, map_res, opt, value},
13 multi::{many0, separated_list1},
14 sequence::{delimited, pair, tuple},
15};
16use std::{cell::RefCell, collections::HashMap, rc::Rc};
17
18#[derive(Clone, PartialEq, Eq)]
19pub enum Prefix {
20 Default,
21 Prefix(String),
22}
23#[derive(Clone, PartialEq, Eq)]
24pub enum AttributeValue {
25 Value(String),
26 Values(Vec<AttributeValue>),
27 Reference(Reference),
28 EmptyExternalReference,
29}
30
31#[derive(Clone, PartialEq, Eq)]
32pub enum Attribute {
33 Definition {
34 name: Name,
35 att_type: AttType,
36 default_decl: DefaultDecl,
37 source: EntitySource,
38 },
39 Reference(Reference),
40 Instance {
41 name: Name,
42 value: AttributeValue,
43 },
44 Required,
45 Implied,
46 Namespace {
47 prefix: Prefix,
48 uri: AttributeValue,
49 },
50}
51
52impl<'a> Parse<'a> for Attribute {
53 type Args = (
54 Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
55 EntitySource,
56 );
57 type Output = IResult<&'a str, Self>;
58
59 fn parse(input: &'a str, args: Self::Args) -> Self::Output {
61 let (entity_references, entity_source) = args;
62 {
63 map(
64 tuple((Self::parse_name, Self::parse_eq, move |i| {
65 Self::parse_attvalue(i, entity_references.clone(), entity_source.clone())
66 })),
67 |(name, _eq, value)| Attribute::Instance { name, value },
68 )(input)
69 }
70 }
71}
72
73impl<'a> ParseNamespace<'a> for Attribute {}
74impl Attribute {
75 pub fn new(name: &str, value: &str) -> Self {
89 Attribute::Instance {
90 name: Name::new(None, name),
91 value: AttributeValue::Value(value.into()),
92 }
93 }
94
95 pub fn parse_definition(
97 input: &str,
98 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
99 entity_source: EntitySource,
100 ) -> IResult<&str, Attribute> {
101 map(
102 tuple((
103 Self::parse_multispace1,
104 Self::parse_name,
105 Self::parse_multispace1,
106 |i| AttType::parse(i, ()),
107 Self::parse_multispace1,
108 |i| DefaultDecl::parse(i, (entity_references.clone(), entity_source.clone())),
109 )),
110 |(_whitespace1, name, _whitespace2, att_type, _whitespace3, default_decl)| {
111 Attribute::Definition {
112 name,
113 att_type,
114 default_decl,
115 source: entity_source.clone(),
116 }
117 },
118 )(input)
119 }
120
121 pub fn parse_qualified_definition(
123 input: &str,
124 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
125 entity_source: EntitySource,
126 ) -> IResult<&str, Attribute> {
127 map(
128 tuple((
129 Self::parse_multispace1,
130 alt((
131 Self::parse_qualified_name,
132 Self::parse_namespace_attribute_name,
133 )),
134 Self::parse_multispace1,
135 |i| AttType::parse(i, ()),
136 Self::parse_multispace1,
137 |i| DefaultDecl::parse(i, (entity_references.clone(), entity_source.clone())),
138 )),
139 |(_whitespace1, name, _whtiespace2, att_type, _whtiespace3, default_decl)| {
140 Attribute::Definition {
141 name,
142 att_type,
143 default_decl,
144 source: entity_source.clone(),
145 }
146 },
147 )(input)
148 }
149
150 pub fn parse_attvalue(
152 input: &str,
153 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
154 entity_source: EntitySource,
155 ) -> IResult<&str, AttributeValue> {
156 match entity_source {
157 EntitySource::Internal | EntitySource::None => {
158 map(
159 alt((
160 delimited(
161 tag("\""),
162 many0(alt((
163 map(
164 take_till1(|c| c == '<' || c == '&' || c == '\"'),
165 |s: &str| AttributeValue::Value(s.into()),
166 ),
167 map(
168 |i| Reference::parse(i, entity_source.clone()),
169 |reference| {
170 reference.normalize_attribute(
171 entity_references.clone(),
172 entity_source.clone(),
173 )
174 },
175 ),
176 ))),
177 tag("\""),
178 ),
179 delimited(
180 tag("'"),
181 many0(alt((
182 map(
183 take_till1(|c| c == '<' || c == '&' || c == '\''),
184 |s: &str| AttributeValue::Value(s.into()),
185 ),
186 map(
187 |i| Reference::parse(i, entity_source.clone()),
188 |reference| {
189 reference.normalize_attribute(
190 entity_references.clone(),
191 entity_source.clone(),
192 )
193 },
194 ),
195 ))),
196 tag("'"),
197 ),
198 )),
199 |contents: Vec<AttributeValue>| {
200 let mut buffer = String::new();
201 for content in contents {
202 if let AttributeValue::Value(mut value) = content {
203 let mut chars: Vec<char> = value.chars().collect();
205 let mut i = 0;
206 while i < chars.len() {
207 if chars[i] == '\r' {
208 if i + 1 < chars.len() && chars[i + 1] == '\n' {
209 chars.remove(i);
210 } else {
211 chars[i] = '\n';
212 }
213 }
214 i += 1;
215 }
216 value = chars.into_iter().collect();
217 buffer.push_str(&value);
218 }
219 }
220
221 AttributeValue::Value(buffer)
222 },
223 )(input)
224 }
225
226 EntitySource::External => {
227 map(
228 many0(alt((
229 map(
230 |i| {
231 tuple((
232 |input| Reference::parse_parameter_reference(input),
233 Self::parse_multispace0,
234 ))(i)
235 },
236 |(reference, _whitespace)| {
237 reference.normalize_attribute(
238 entity_references.clone(),
239 entity_source.clone(),
240 )
241 },
242 ),
243 delimited(
244 tag("\""),
245 map(
246 many0(alt((
247 map(
248 take_till1(|c| c == '<' || c == '&' || c == '\"'),
249 |s: &str| AttributeValue::Value(s.into()),
250 ),
251 map(
252 |i| Reference::parse(i, entity_source.clone()),
253 |reference| {
254 reference.normalize_attribute(
255 entity_references.clone(),
256 entity_source.clone(),
257 )
258 },
259 ),
260 ))),
261 |values| {
262 let mut buffer = String::new();
263 for value in values {
264 if let AttributeValue::Value(v) = value {
265 buffer.push_str(&v);
266 }
267 }
268 AttributeValue::Value(buffer)
269 },
270 ),
271 tag("\""),
272 ),
273 delimited(
274 tag("'"),
275 map(
276 many0(alt((
277 map(
278 take_till1(|c| c == '<' || c == '&' || c == '\''),
279 |s: &str| AttributeValue::Value(s.into()),
280 ),
281 map(
282 |i| Reference::parse(i, entity_source.clone()),
283 |reference| {
284 reference.normalize_attribute(
285 entity_references.clone(),
286 entity_source.clone(),
287 )
288 },
289 ),
290 ))),
291 |values| {
292 let mut buffer = String::new();
293 for value in values {
294 if let AttributeValue::Value(v) = value {
295 buffer.push_str(&v);
296 }
297 }
298 AttributeValue::Value(buffer)
299 },
300 ),
301 tag("'"),
302 ),
303 ))),
304 |contents: Vec<AttributeValue>| {
305 let mut buffer = String::new();
306 for content in contents {
307 if let AttributeValue::Value(mut value) = content {
308 let mut chars: Vec<char> = value.chars().collect();
310 let mut i = 0;
311 while i < chars.len() {
312 if chars[i] == '\r' {
313 if i + 1 < chars.len() && chars[i + 1] == '\n' {
314 chars.remove(i);
315 } else {
316 chars[i] = '\n';
317 }
318 }
319 i += 1;
320 }
321 value = chars.into_iter().collect();
322 buffer.push_str(&value);
323 }
324 }
325
326 AttributeValue::Value(buffer)
327 },
328 )(input)
329 }
330 }
331 }
332
333 pub fn parse_attribute(
335 input: &str,
336 entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
337 entity_source: EntitySource,
338 ) -> IResult<&str, Attribute> {
339 map(
340 alt((
341 tuple((Self::parse_namespace_attribute_name, Self::parse_eq, |i| {
342 Attribute::parse_attvalue(i, entity_references.clone(), entity_source.clone())
343 })),
344 tuple((Self::parse_qualified_name, Self::parse_eq, |i| {
345 Self::parse_attvalue(i, entity_references.clone(), entity_source.clone())
346 })),
347 )),
348 |result| match result {
349 (name, _eq, value) if name.prefix.is_some() => {
350 let prefix = name.prefix.unwrap();
351
352 if &prefix == "xmlns" {
353 Attribute::Namespace {
354 prefix: Prefix::Default,
355 uri: value,
356 }
357 } else {
358 Attribute::Namespace {
359 prefix: Prefix::Prefix(prefix),
360 uri: value,
361 }
362 }
363 }
364 (Name { prefix, local_part }, _eq, value) => Attribute::Instance {
365 name: Name { prefix, local_part },
366 value,
367 },
368 },
369 )(input)
370 }
371}
372
373#[derive(Clone, Debug, PartialEq, Eq)]
374pub enum TokenizedType {
375 ID,
376 IDREF,
377 IDREFS,
378 ENTITY,
379 ENTITIES,
380 NMTOKEN,
381 NMTOKENS,
382}
383
384impl TokenizedType {
385 fn parse(input: &str) -> IResult<&str, TokenizedType> {
387 alt((
388 value(TokenizedType::IDREFS, tag("IDREFS")),
389 value(TokenizedType::IDREF, tag("IDREF")),
390 value(TokenizedType::ID, tag("ID")),
391 value(TokenizedType::ENTITY, tag("ENTITY")),
392 value(TokenizedType::ENTITIES, tag("ENTITIES")),
393 value(TokenizedType::NMTOKENS, tag("NMTOKENS")),
394 value(TokenizedType::NMTOKEN, tag("NMTOKEN")),
395 ))(input)
396 }
397}
398
399#[derive(Clone, Debug, PartialEq, Eq)]
400pub enum AttType {
401 CDATA,
402 Tokenized(TokenizedType),
403 Enumerated {
404 notation: Option<Vec<Name>>,
405 enumeration: Option<Vec<String>>,
406 },
407}
408
409impl<'a> Parse<'a> for AttType {
410 type Args = ();
411 type Output = IResult<&'a str, Self>;
412 fn parse(input: &'a str, _args: Self::Args) -> Self::Output {
414 let (input, att_type) = map(
415 alt((
416 value(AttType::CDATA, tag("CDATA")),
418 map(TokenizedType::parse, AttType::Tokenized),
420 Self::parse_enumerated_type,
421 )),
422 |parsed_att_type| parsed_att_type,
423 )(input)?;
424
425 Ok((input, att_type))
426 }
427}
428impl AttType {
429 fn parse_enumerated_type(input: &str) -> IResult<&str, AttType> {
431 alt((Self::parse_notation_type, Self::parse_enumeration))(input)
432 }
433
434 fn parse_notation_type(input: &str) -> IResult<&str, AttType> {
436 map(
437 tuple((
438 tag("NOTATION"),
439 Self::parse_multispace1,
440 delimited(
441 char('('),
442 delimited(
443 Self::parse_multispace0,
444 separated_list1(
445 delimited(Self::parse_multispace0, char('|'), Self::parse_multispace0),
446 Self::parse_name,
447 ),
448 Self::parse_multispace0,
449 ),
450 char(')'),
451 ),
452 )),
453 |(_notation_literal, _whitespace, names)| AttType::Enumerated {
454 notation: Some(names),
455 enumeration: None,
456 },
457 )(input)
458 }
459
460 fn parse_enumeration(input: &str) -> IResult<&str, AttType> {
462 map(
463 delimited(
464 char('('),
465 separated_list1(
466 tuple((Self::parse_multispace0, char('|'), Self::parse_multispace0)),
467 Self::parse_nmtoken,
468 ),
469 char(')'),
470 ),
471 |enumeration| AttType::Enumerated {
472 notation: None,
473 enumeration: Some(enumeration),
474 },
475 )(input)
476 }
477}
478
479#[derive(Clone, Debug, PartialEq, Eq)]
480pub enum DefaultDecl {
481 Required,
482 Implied,
483 Fixed(String),
484 Value(String),
485}
486
487impl<'a> Parse<'a> for DefaultDecl {
488 type Args = (
489 Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
490 EntitySource,
491 );
492 type Output = IResult<&'a str, Self>;
493 fn parse(input: &'a str, args: Self::Args) -> Self::Output {
495 let (entity_references, entity_source) = args;
496 let cloned_entity_references = entity_references.clone();
497 let cloned_entity_source = entity_source.clone();
498 alt((
499 value(DefaultDecl::Required, tag("#REQUIRED")),
500 value(DefaultDecl::Implied, tag("#IMPLIED")),
501 map_res(
502 pair(
503 opt(tuple((tag("#FIXED"), Self::parse_multispace1))),
504 move |i| {
505 Attribute::parse_attvalue(
506 i,
507 cloned_entity_references.clone(),
508 cloned_entity_source.clone(),
509 )
510 },
511 ),
512 |(fixed, attvalue)| {
513 if let AttributeValue::Value(value) = attvalue {
514 match fixed {
515 Some(_) => Ok(DefaultDecl::Fixed(value)),
516 None => Ok(DefaultDecl::Value(value)),
517 }
518 } else {
519 Err(nom::Err::Failure(nom::error::Error::new(
520 format!("Failed to parse attvalue: {attvalue:?}"), nom::error::ErrorKind::Fail,
522 )))
523 }
524 },
525 ),
526 ))(input)
527 }
528}