quick_xml/events/attributes.rs
1//! Xml Attributes module
2//!
3//! Provides an iterator over attributes key/value pairs
4
5use crate::encoding::Decoder;
6use crate::errors::Result as XmlResult;
7use crate::escape::{escape, resolve_predefined_entity, unescape_with};
8use crate::name::{LocalName, Namespace, NamespaceResolver, QName};
9use crate::utils::{is_whitespace, Bytes};
10
11use std::fmt::{self, Debug, Display, Formatter};
12use std::iter::FusedIterator;
13use std::{borrow::Cow, ops::Range};
14
15/// A struct representing a key/value XML attribute.
16///
17/// Field `value` stores raw bytes, possibly containing escape-sequences. Most users will likely
18/// want to access the value using one of the [`unescape_value`] and [`decode_and_unescape_value`]
19/// functions.
20///
21/// [`unescape_value`]: Self::unescape_value
22/// [`decode_and_unescape_value`]: Self::decode_and_unescape_value
23#[derive(Clone, Eq, PartialEq)]
24pub struct Attribute<'a> {
25 /// The key to uniquely define the attribute.
26 ///
27 /// If [`Attributes::with_checks`] is turned off, the key might not be unique.
28 pub key: QName<'a>,
29 /// The raw value of the attribute.
30 pub value: Cow<'a, [u8]>,
31}
32
33impl<'a> Attribute<'a> {
34 /// Decodes using UTF-8 then unescapes the value.
35 ///
36 /// This is normally the value you are interested in. Escape sequences such as `>` are
37 /// replaced with their unescaped equivalents such as `>`.
38 ///
39 /// This will allocate if the value contains any escape sequences.
40 ///
41 /// See also [`unescape_value_with()`](Self::unescape_value_with)
42 ///
43 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
44 ///
45 /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled,
46 /// should only be used by applications.
47 /// Libs should use [`decode_and_unescape_value()`](Self::decode_and_unescape_value)
48 /// instead, because if lib will be used in a project which depends on quick_xml with
49 /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification].
50 ///
51 /// </div>
52 ///
53 /// [`encoding`]: ../../index.html#encoding
54 /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification
55 #[cfg(any(doc, not(feature = "encoding")))]
56 pub fn unescape_value(&self) -> XmlResult<Cow<'a, str>> {
57 self.unescape_value_with(resolve_predefined_entity)
58 }
59
60 /// Decodes using UTF-8 then unescapes the value, using custom entities.
61 ///
62 /// This is normally the value you are interested in. Escape sequences such as `>` are
63 /// replaced with their unescaped equivalents such as `>`.
64 /// A fallback resolver for additional custom entities can be provided via
65 /// `resolve_entity`.
66 ///
67 /// This will allocate if the value contains any escape sequences.
68 ///
69 /// See also [`unescape_value()`](Self::unescape_value)
70 ///
71 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
72 ///
73 /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled,
74 /// should only be used by applications.
75 /// Libs should use [`decode_and_unescape_value_with()`](Self::decode_and_unescape_value_with)
76 /// instead, because if lib will be used in a project which depends on quick_xml with
77 /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification].
78 ///
79 /// </div>
80 ///
81 /// [`encoding`]: ../../index.html#encoding
82 /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification
83 #[cfg(any(doc, not(feature = "encoding")))]
84 #[inline]
85 pub fn unescape_value_with<'entity>(
86 &self,
87 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
88 ) -> XmlResult<Cow<'a, str>> {
89 self.decode_and_unescape_value_with(Decoder::utf8(), resolve_entity)
90 }
91
92 /// Decodes then unescapes the value.
93 ///
94 /// This will allocate if the value contains any escape sequences or in
95 /// non-UTF-8 encoding.
96 pub fn decode_and_unescape_value(&self, decoder: Decoder) -> XmlResult<Cow<'a, str>> {
97 self.decode_and_unescape_value_with(decoder, resolve_predefined_entity)
98 }
99
100 /// Decodes then unescapes the value with custom entities.
101 ///
102 /// This will allocate if the value contains any escape sequences or in
103 /// non-UTF-8 encoding.
104 pub fn decode_and_unescape_value_with<'entity>(
105 &self,
106 decoder: Decoder,
107 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
108 ) -> XmlResult<Cow<'a, str>> {
109 let decoded = decoder.decode_cow(&self.value)?;
110
111 match unescape_with(&decoded, resolve_entity)? {
112 // Because result is borrowed, no replacements was done and we can use original string
113 Cow::Borrowed(_) => Ok(decoded),
114 Cow::Owned(s) => Ok(s.into()),
115 }
116 }
117
118 /// If attribute value [represents] valid boolean values, returns `Some`, otherwise returns `None`.
119 ///
120 /// The valid boolean representations are only `"true"`, `"false"`, `"1"`, and `"0"`.
121 ///
122 /// # Examples
123 ///
124 /// ```
125 /// # use pretty_assertions::assert_eq;
126 /// use quick_xml::events::attributes::Attribute;
127 ///
128 /// let attr = Attribute::from(("attr", "false"));
129 /// assert_eq!(attr.as_bool(), Some(false));
130 ///
131 /// let attr = Attribute::from(("attr", "0"));
132 /// assert_eq!(attr.as_bool(), Some(false));
133 ///
134 /// let attr = Attribute::from(("attr", "true"));
135 /// assert_eq!(attr.as_bool(), Some(true));
136 ///
137 /// let attr = Attribute::from(("attr", "1"));
138 /// assert_eq!(attr.as_bool(), Some(true));
139 ///
140 /// let attr = Attribute::from(("attr", "bot bool"));
141 /// assert_eq!(attr.as_bool(), None);
142 /// ```
143 ///
144 /// [represents]: https://www.w3.org/TR/xmlschema11-2/#boolean
145 #[inline]
146 pub fn as_bool(&self) -> Option<bool> {
147 match self.value.as_ref() {
148 b"1" | b"true" => Some(true),
149 b"0" | b"false" => Some(false),
150 _ => None,
151 }
152 }
153}
154
155impl<'a> Debug for Attribute<'a> {
156 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
157 f.debug_struct("Attribute")
158 .field("key", &Bytes(self.key.as_ref()))
159 .field("value", &Bytes(&self.value))
160 .finish()
161 }
162}
163
164impl<'a> From<(&'a [u8], &'a [u8])> for Attribute<'a> {
165 /// Creates new attribute from raw bytes.
166 /// Does not apply any transformation to both key and value.
167 ///
168 /// # Examples
169 ///
170 /// ```
171 /// # use pretty_assertions::assert_eq;
172 /// use quick_xml::events::attributes::Attribute;
173 ///
174 /// let features = Attribute::from(("features".as_bytes(), "Bells & whistles".as_bytes()));
175 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
176 /// ```
177 fn from(val: (&'a [u8], &'a [u8])) -> Attribute<'a> {
178 Attribute {
179 key: QName(val.0),
180 value: Cow::from(val.1),
181 }
182 }
183}
184
185impl<'a> From<(&'a str, &'a str)> for Attribute<'a> {
186 /// Creates new attribute from text representation.
187 /// Key is stored as-is, but the value will be escaped.
188 ///
189 /// # Examples
190 ///
191 /// ```
192 /// # use pretty_assertions::assert_eq;
193 /// use quick_xml::events::attributes::Attribute;
194 ///
195 /// let features = Attribute::from(("features", "Bells & whistles"));
196 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
197 /// ```
198 fn from(val: (&'a str, &'a str)) -> Attribute<'a> {
199 Attribute {
200 key: QName(val.0.as_bytes()),
201 value: match escape(val.1) {
202 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
203 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
204 },
205 }
206 }
207}
208
209impl<'a> From<(&'a str, Cow<'a, str>)> for Attribute<'a> {
210 /// Creates new attribute from text representation.
211 /// Key is stored as-is, but the value will be escaped.
212 ///
213 /// # Examples
214 ///
215 /// ```
216 /// # use std::borrow::Cow;
217 /// use pretty_assertions::assert_eq;
218 /// use quick_xml::events::attributes::Attribute;
219 ///
220 /// let features = Attribute::from(("features", Cow::Borrowed("Bells & whistles")));
221 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
222 /// ```
223 fn from(val: (&'a str, Cow<'a, str>)) -> Attribute<'a> {
224 Attribute {
225 key: QName(val.0.as_bytes()),
226 value: match escape(val.1) {
227 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
228 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
229 },
230 }
231 }
232}
233
234impl<'a> From<Attr<&'a [u8]>> for Attribute<'a> {
235 #[inline]
236 fn from(attr: Attr<&'a [u8]>) -> Self {
237 Self {
238 key: attr.key(),
239 value: Cow::Borrowed(attr.value()),
240 }
241 }
242}
243
244////////////////////////////////////////////////////////////////////////////////////////////////////
245
246/// Iterator over XML attributes.
247///
248/// Yields `Result<Attribute>`. An `Err` will be yielded if an attribute is malformed or duplicated.
249/// The duplicate check can be turned off by calling [`with_checks(false)`].
250///
251/// When [`serialize`] feature is enabled, can be converted to serde's deserializer.
252///
253/// [`with_checks(false)`]: Self::with_checks
254/// [`serialize`]: ../../index.html#serialize
255#[derive(Clone)]
256pub struct Attributes<'a> {
257 /// Slice of `BytesStart` corresponding to attributes
258 bytes: &'a [u8],
259 /// Iterator state, independent from the actual source of bytes
260 state: IterState,
261 /// Encoding used for `bytes`
262 decoder: Decoder,
263}
264
265impl<'a> Attributes<'a> {
266 /// Internal constructor, used by `BytesStart`. Supplies data in reader's encoding
267 #[inline]
268 pub(crate) const fn wrap(buf: &'a [u8], pos: usize, html: bool, decoder: Decoder) -> Self {
269 Self {
270 bytes: buf,
271 state: IterState::new(pos, html),
272 decoder,
273 }
274 }
275
276 /// Creates a new attribute iterator from a buffer, which recognizes only XML-style
277 /// attributes, i. e. those which in the form `name = "value"` or `name = 'value'`.
278 /// HTML style attributes (i. e. without quotes or only name) will return a error.
279 ///
280 /// # Parameters
281 /// - `buf`: a buffer with a tag name and attributes, usually this is the whole
282 /// string between `<` and `>` (or `/>`) of a tag;
283 /// - `pos`: a position in the `buf` where tag name is finished and attributes
284 /// is started. It is not necessary to point exactly to the end of a tag name,
285 /// although that is usually that. If it will be more than the `buf` length,
286 /// then the iterator will return `None`` immediately.
287 ///
288 /// # Example
289 /// ```
290 /// # use quick_xml::events::attributes::{Attribute, Attributes};
291 /// # use pretty_assertions::assert_eq;
292 /// #
293 /// let mut iter = Attributes::new("tag-name attr1 = 'value1' attr2='value2' ", 9);
294 /// // ^0 ^9
295 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1")))));
296 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "value2")))));
297 /// assert_eq!(iter.next(), None);
298 /// ```
299 pub const fn new(buf: &'a str, pos: usize) -> Self {
300 Self::wrap(buf.as_bytes(), pos, false, Decoder::utf8())
301 }
302
303 /// Creates a new attribute iterator from a buffer, allowing HTML attribute syntax.
304 ///
305 /// # Parameters
306 /// - `buf`: a buffer with a tag name and attributes, usually this is the whole
307 /// string between `<` and `>` (or `/>`) of a tag;
308 /// - `pos`: a position in the `buf` where tag name is finished and attributes
309 /// is started. It is not necessary to point exactly to the end of a tag name,
310 /// although that is usually that. If it will be more than the `buf` length,
311 /// then the iterator will return `None`` immediately.
312 ///
313 /// # Example
314 /// ```
315 /// # use quick_xml::events::attributes::{Attribute, Attributes};
316 /// # use pretty_assertions::assert_eq;
317 /// #
318 /// let mut iter = Attributes::html("tag-name attr1 = value1 attr2 ", 9);
319 /// // ^0 ^9
320 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1")))));
321 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "")))));
322 /// assert_eq!(iter.next(), None);
323 /// ```
324 pub const fn html(buf: &'a str, pos: usize) -> Self {
325 Self::wrap(buf.as_bytes(), pos, true, Decoder::utf8())
326 }
327
328 /// Changes whether attributes should be checked for uniqueness.
329 ///
330 /// The XML specification requires attribute keys in the same element to be unique. This check
331 /// can be disabled to improve performance slightly.
332 ///
333 /// (`true` by default)
334 pub fn with_checks(&mut self, val: bool) -> &mut Attributes<'a> {
335 self.state.check_duplicates = val;
336 self
337 }
338
339 /// Checks if the current tag has a [`xsi:nil`] attribute. This method ignores any errors in
340 /// attributes.
341 ///
342 /// # Examples
343 ///
344 /// ```
345 /// # use pretty_assertions::assert_eq;
346 /// use quick_xml::events::Event;
347 /// use quick_xml::name::QName;
348 /// use quick_xml::reader::NsReader;
349 ///
350 /// let mut reader = NsReader::from_str("
351 /// <root xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'>
352 /// <true xsi:nil='true'/>
353 /// <false xsi:nil='false'/>
354 /// <none/>
355 /// <non-xsi xsi:nil='true' xmlns:xsi='namespace'/>
356 /// <unbound-nil nil='true' xmlns='http://www.w3.org/2001/XMLSchema-instance'/>
357 /// <another-xmlns f:nil='true' xmlns:f='http://www.w3.org/2001/XMLSchema-instance'/>
358 /// </root>
359 /// ");
360 /// reader.config_mut().trim_text(true);
361 ///
362 /// macro_rules! check {
363 /// ($reader:expr, $name:literal, $value:literal) => {
364 /// let event = match $reader.read_event().unwrap() {
365 /// Event::Empty(e) => e,
366 /// e => panic!("Unexpected event {:?}", e),
367 /// };
368 /// assert_eq!(
369 /// (event.name(), event.attributes().has_nil($reader.resolver())),
370 /// (QName($name.as_bytes()), $value),
371 /// );
372 /// };
373 /// }
374 ///
375 /// let root = match reader.read_event().unwrap() {
376 /// Event::Start(e) => e,
377 /// e => panic!("Unexpected event {:?}", e),
378 /// };
379 /// assert_eq!(root.attributes().has_nil(reader.resolver()), false);
380 ///
381 /// // definitely true
382 /// check!(reader, "true", true);
383 /// // definitely false
384 /// check!(reader, "false", false);
385 /// // absence of the attribute means that attribute is not set
386 /// check!(reader, "none", false);
387 /// // attribute not bound to the correct namespace
388 /// check!(reader, "non-xsi", false);
389 /// // attributes without prefix not bound to any namespace
390 /// check!(reader, "unbound-nil", false);
391 /// // prefix can be any while it is bound to the correct namespace
392 /// check!(reader, "another-xmlns", true);
393 /// ```
394 ///
395 /// [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil
396 pub fn has_nil(&mut self, resolver: &NamespaceResolver) -> bool {
397 use crate::name::ResolveResult::*;
398
399 self.any(|attr| {
400 if let Ok(attr) = attr {
401 match resolver.resolve_attribute(attr.key) {
402 (
403 Bound(Namespace(b"http://www.w3.org/2001/XMLSchema-instance")),
404 LocalName(b"nil"),
405 ) => attr.as_bool().unwrap_or_default(),
406 _ => false,
407 }
408 } else {
409 false
410 }
411 })
412 }
413
414 /// Get the decoder, used to decode bytes, read by the reader which produces
415 /// this iterator, to the strings.
416 ///
417 /// When iterator was created manually or get from a manually created [`BytesStart`],
418 /// encoding is UTF-8.
419 ///
420 /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
421 /// defaults to UTF-8.
422 ///
423 /// [`BytesStart`]: crate::events::BytesStart
424 /// [`encoding`]: ../index.html#encoding
425 #[inline]
426 pub const fn decoder(&self) -> Decoder {
427 self.decoder
428 }
429}
430
431impl<'a> Debug for Attributes<'a> {
432 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
433 f.debug_struct("Attributes")
434 .field("bytes", &Bytes(self.bytes))
435 .field("state", &self.state)
436 .field("decoder", &self.decoder)
437 .finish()
438 }
439}
440
441impl<'a> Iterator for Attributes<'a> {
442 type Item = Result<Attribute<'a>, AttrError>;
443
444 #[inline]
445 fn next(&mut self) -> Option<Self::Item> {
446 match self.state.next(self.bytes) {
447 None => None,
448 Some(Ok(a)) => Some(Ok(a.map(|range| &self.bytes[range]).into())),
449 Some(Err(e)) => Some(Err(e)),
450 }
451 }
452}
453
454impl<'a> FusedIterator for Attributes<'a> {}
455
456////////////////////////////////////////////////////////////////////////////////////////////////////
457
458/// Errors that can be raised during parsing attributes.
459///
460/// Recovery position in examples shows the position from which parsing of the
461/// next attribute will be attempted.
462#[derive(Clone, Debug, PartialEq, Eq)]
463pub enum AttrError {
464 /// Attribute key was not followed by `=`, position relative to the start of
465 /// the owning tag is provided.
466 ///
467 /// Example of input that raises this error:
468 ///
469 /// ```xml
470 /// <tag key another="attribute"/>
471 /// <!-- ^~~ error position, recovery position (8) -->
472 /// ```
473 ///
474 /// This error can be raised only when the iterator is in XML mode.
475 ExpectedEq(usize),
476 /// Attribute value was not found after `=`, position relative to the start
477 /// of the owning tag is provided.
478 ///
479 /// Example of input that raises this error:
480 ///
481 /// ```xml
482 /// <tag key = />
483 /// <!-- ^~~ error position, recovery position (10) -->
484 /// ```
485 ///
486 /// This error can be returned only for the last attribute in the list,
487 /// because otherwise any content after `=` will be threated as a value.
488 /// The XML
489 ///
490 /// ```xml
491 /// <tag key = another-key = "value"/>
492 /// <!-- ^ ^- recovery position (24) -->
493 /// <!-- '~~ error position (22) -->
494 /// ```
495 ///
496 /// will be treated as `Attribute { key = b"key", value = b"another-key" }`
497 /// and or [`Attribute`] is returned, or [`AttrError::UnquotedValue`] is raised,
498 /// depending on the parsing mode.
499 ExpectedValue(usize),
500 /// Attribute value is not quoted, position relative to the start of the
501 /// owning tag is provided.
502 ///
503 /// Example of input that raises this error:
504 ///
505 /// ```xml
506 /// <tag key = value />
507 /// <!-- ^ ^~~ recovery position (15) -->
508 /// <!-- '~~ error position (10) -->
509 /// ```
510 ///
511 /// This error can be raised only when the iterator is in XML mode.
512 UnquotedValue(usize),
513 /// Attribute value was not finished with a matching quote, position relative
514 /// to the start of owning tag and a quote is provided. That position is always
515 /// a last character in the tag content.
516 ///
517 /// Example of input that raises this error:
518 ///
519 /// ```xml
520 /// <tag key = "value />
521 /// <tag key = 'value />
522 /// <!-- ^~~ error position, recovery position (18) -->
523 /// ```
524 ///
525 /// This error can be returned only for the last attribute in the list,
526 /// because all input was consumed during scanning for a quote.
527 ExpectedQuote(usize, u8),
528 /// An attribute with the same name was already encountered. Two parameters
529 /// define (1) the error position relative to the start of the owning tag
530 /// for a new attribute and (2) the start position of a previously encountered
531 /// attribute with the same name.
532 ///
533 /// Example of input that raises this error:
534 ///
535 /// ```xml
536 /// <tag key = 'value' key="value2" attr3='value3' />
537 /// <!-- ^ ^ ^~~ recovery position (32) -->
538 /// <!-- | '~~ error position (19) -->
539 /// <!-- '~~ previous position (4) -->
540 /// ```
541 ///
542 /// This error is returned only when [`Attributes::with_checks()`] is set
543 /// to `true` (that is default behavior).
544 Duplicated(usize, usize),
545}
546
547impl Display for AttrError {
548 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
549 match self {
550 Self::ExpectedEq(pos) => write!(
551 f,
552 r#"position {}: attribute key must be directly followed by `=` or space"#,
553 pos
554 ),
555 Self::ExpectedValue(pos) => write!(
556 f,
557 r#"position {}: `=` must be followed by an attribute value"#,
558 pos
559 ),
560 Self::UnquotedValue(pos) => write!(
561 f,
562 r#"position {}: attribute value must be enclosed in `"` or `'`"#,
563 pos
564 ),
565 Self::ExpectedQuote(pos, quote) => write!(
566 f,
567 r#"position {}: missing closing quote `{}` in attribute value"#,
568 pos, *quote as char
569 ),
570 Self::Duplicated(pos1, pos2) => write!(
571 f,
572 r#"position {}: duplicated attribute, previous declaration at position {}"#,
573 pos1, pos2
574 ),
575 }
576 }
577}
578
579impl std::error::Error for AttrError {}
580
581////////////////////////////////////////////////////////////////////////////////////////////////////
582
583/// A struct representing a key/value XML or HTML [attribute].
584///
585/// [attribute]: https://www.w3.org/TR/xml11/#NT-Attribute
586#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
587pub enum Attr<T> {
588 /// Attribute with value enclosed in double quotes (`"`). Attribute key and
589 /// value provided. This is a canonical XML-style attribute.
590 DoubleQ(T, T),
591 /// Attribute with value enclosed in single quotes (`'`). Attribute key and
592 /// value provided. This is an XML-style attribute.
593 SingleQ(T, T),
594 /// Attribute with value not enclosed in quotes. Attribute key and value
595 /// provided. This is HTML-style attribute, it can be returned in HTML-mode
596 /// parsing only. In an XML mode [`AttrError::UnquotedValue`] will be raised
597 /// instead.
598 ///
599 /// Attribute value can be invalid according to the [HTML specification],
600 /// in particular, it can contain `"`, `'`, `=`, `<`, and <code>`</code>
601 /// characters. The absence of the `>` character is nevertheless guaranteed,
602 /// since the parser extracts [events] based on them even before the start
603 /// of parsing attributes.
604 ///
605 /// [HTML specification]: https://html.spec.whatwg.org/#unquoted
606 /// [events]: crate::events::Event::Start
607 Unquoted(T, T),
608 /// Attribute without value. Attribute key provided. This is HTML-style attribute,
609 /// it can be returned in HTML-mode parsing only. In XML mode
610 /// [`AttrError::ExpectedEq`] will be raised instead.
611 Empty(T),
612}
613
614impl<T> Attr<T> {
615 /// Maps an `Attr<T>` to `Attr<U>` by applying a function to a contained key and value.
616 #[inline]
617 pub fn map<U, F>(self, mut f: F) -> Attr<U>
618 where
619 F: FnMut(T) -> U,
620 {
621 match self {
622 Attr::DoubleQ(key, value) => Attr::DoubleQ(f(key), f(value)),
623 Attr::SingleQ(key, value) => Attr::SingleQ(f(key), f(value)),
624 Attr::Empty(key) => Attr::Empty(f(key)),
625 Attr::Unquoted(key, value) => Attr::Unquoted(f(key), f(value)),
626 }
627 }
628}
629
630impl<'a> Attr<&'a [u8]> {
631 /// Returns the key value
632 #[inline]
633 pub const fn key(&self) -> QName<'a> {
634 QName(match self {
635 Attr::DoubleQ(key, _) => key,
636 Attr::SingleQ(key, _) => key,
637 Attr::Empty(key) => key,
638 Attr::Unquoted(key, _) => key,
639 })
640 }
641 /// Returns the attribute value. For [`Self::Empty`] variant an empty slice
642 /// is returned according to the [HTML specification].
643 ///
644 /// [HTML specification]: https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#syntax-attr-empty
645 #[inline]
646 pub const fn value(&self) -> &'a [u8] {
647 match self {
648 Attr::DoubleQ(_, value) => value,
649 Attr::SingleQ(_, value) => value,
650 Attr::Empty(_) => &[],
651 Attr::Unquoted(_, value) => value,
652 }
653 }
654}
655
656impl<T: AsRef<[u8]>> Debug for Attr<T> {
657 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
658 match self {
659 Attr::DoubleQ(key, value) => f
660 .debug_tuple("Attr::DoubleQ")
661 .field(&Bytes(key.as_ref()))
662 .field(&Bytes(value.as_ref()))
663 .finish(),
664 Attr::SingleQ(key, value) => f
665 .debug_tuple("Attr::SingleQ")
666 .field(&Bytes(key.as_ref()))
667 .field(&Bytes(value.as_ref()))
668 .finish(),
669 Attr::Empty(key) => f
670 .debug_tuple("Attr::Empty")
671 // Comment to prevent formatting and keep style consistent
672 .field(&Bytes(key.as_ref()))
673 .finish(),
674 Attr::Unquoted(key, value) => f
675 .debug_tuple("Attr::Unquoted")
676 .field(&Bytes(key.as_ref()))
677 .field(&Bytes(value.as_ref()))
678 .finish(),
679 }
680 }
681}
682
683/// Unpacks attribute key and value into tuple of this two elements.
684/// `None` value element is returned only for [`Attr::Empty`] variant.
685impl<T> From<Attr<T>> for (T, Option<T>) {
686 #[inline]
687 fn from(attr: Attr<T>) -> Self {
688 match attr {
689 Attr::DoubleQ(key, value) => (key, Some(value)),
690 Attr::SingleQ(key, value) => (key, Some(value)),
691 Attr::Empty(key) => (key, None),
692 Attr::Unquoted(key, value) => (key, Some(value)),
693 }
694 }
695}
696
697////////////////////////////////////////////////////////////////////////////////////////////////////
698
699type AttrResult = Result<Attr<Range<usize>>, AttrError>;
700
701#[derive(Clone, Copy, Debug)]
702enum State {
703 /// Iteration finished, iterator will return `None` to all [`IterState::next`]
704 /// requests.
705 Done,
706 /// The last attribute returned was deserialized successfully. Contains an
707 /// offset from which next attribute should be searched.
708 Next(usize),
709 /// The last attribute returns [`AttrError::UnquotedValue`], offset pointed
710 /// to the beginning of the value. Recover should skip a value
711 SkipValue(usize),
712 /// The last attribute returns [`AttrError::Duplicated`], offset pointed to
713 /// the equal (`=`) sign. Recover should skip it and a value
714 SkipEqValue(usize),
715}
716
717/// External iterator over spans of attribute key and value
718#[derive(Clone, Debug)]
719pub(crate) struct IterState {
720 /// Iteration state that determines what actions should be done before the
721 /// actual parsing of the next attribute
722 state: State,
723 /// If `true`, enables ability to parse unquoted values and key-only (empty)
724 /// attributes
725 html: bool,
726 /// If `true`, checks for duplicate names
727 check_duplicates: bool,
728 /// If `check_duplicates` is set, contains the ranges of already parsed attribute
729 /// names. We store a ranges instead of slices to able to report a previous
730 /// attribute position
731 keys: Vec<Range<usize>>,
732}
733
734impl IterState {
735 pub const fn new(offset: usize, html: bool) -> Self {
736 Self {
737 state: State::Next(offset),
738 html,
739 check_duplicates: true,
740 keys: Vec::new(),
741 }
742 }
743
744 /// Recover from an error that could have been made on a previous step.
745 /// Returns an offset from which parsing should continue.
746 /// If there no input left, returns `None`.
747 fn recover(&self, slice: &[u8]) -> Option<usize> {
748 match self.state {
749 State::Done => None,
750 State::Next(offset) => Some(offset),
751 State::SkipValue(offset) => self.skip_value(slice, offset),
752 State::SkipEqValue(offset) => self.skip_eq_value(slice, offset),
753 }
754 }
755
756 /// Skip all characters up to first space symbol or end-of-input
757 #[inline]
758 #[allow(clippy::manual_map)]
759 fn skip_value(&self, slice: &[u8], offset: usize) -> Option<usize> {
760 let mut iter = (offset..).zip(slice[offset..].iter());
761
762 match iter.find(|(_, &b)| is_whitespace(b)) {
763 // Input: ` key = value `
764 // | ^
765 // offset e
766 Some((e, _)) => Some(e),
767 // Input: ` key = value`
768 // | ^
769 // offset e = len()
770 None => None,
771 }
772 }
773
774 /// Skip all characters up to first space symbol or end-of-input
775 #[inline]
776 fn skip_eq_value(&self, slice: &[u8], offset: usize) -> Option<usize> {
777 let mut iter = (offset..).zip(slice[offset..].iter());
778
779 // Skip all up to the quote and get the quote type
780 let quote = match iter.find(|(_, &b)| !is_whitespace(b)) {
781 // Input: ` key = "`
782 // | ^
783 // offset
784 Some((_, b'"')) => b'"',
785 // Input: ` key = '`
786 // | ^
787 // offset
788 Some((_, b'\'')) => b'\'',
789
790 // Input: ` key = x`
791 // | ^
792 // offset
793 Some((offset, _)) => return self.skip_value(slice, offset),
794 // Input: ` key = `
795 // | ^
796 // offset
797 None => return None,
798 };
799
800 match iter.find(|(_, &b)| b == quote) {
801 // Input: ` key = " "`
802 // ^
803 Some((e, b'"')) => Some(e),
804 // Input: ` key = ' '`
805 // ^
806 Some((e, _)) => Some(e),
807
808 // Input: ` key = " `
809 // Input: ` key = ' `
810 // ^
811 // Closing quote not found
812 None => None,
813 }
814 }
815
816 #[inline]
817 fn check_for_duplicates(
818 &mut self,
819 slice: &[u8],
820 key: Range<usize>,
821 ) -> Result<Range<usize>, AttrError> {
822 if self.check_duplicates {
823 if let Some(prev) = self
824 .keys
825 .iter()
826 .find(|r| slice[(*r).clone()] == slice[key.clone()])
827 {
828 return Err(AttrError::Duplicated(key.start, prev.start));
829 }
830 self.keys.push(key.clone());
831 }
832 Ok(key)
833 }
834
835 /// # Parameters
836 ///
837 /// - `slice`: content of the tag, used for checking for duplicates
838 /// - `key`: Range of key in slice, if iterator in HTML mode
839 /// - `offset`: Position of error if iterator in XML mode
840 #[inline]
841 fn key_only(&mut self, slice: &[u8], key: Range<usize>, offset: usize) -> Option<AttrResult> {
842 Some(if self.html {
843 self.check_for_duplicates(slice, key).map(Attr::Empty)
844 } else {
845 Err(AttrError::ExpectedEq(offset))
846 })
847 }
848
849 #[inline]
850 fn double_q(&mut self, key: Range<usize>, value: Range<usize>) -> Option<AttrResult> {
851 self.state = State::Next(value.end + 1); // +1 for `"`
852
853 Some(Ok(Attr::DoubleQ(key, value)))
854 }
855
856 #[inline]
857 fn single_q(&mut self, key: Range<usize>, value: Range<usize>) -> Option<AttrResult> {
858 self.state = State::Next(value.end + 1); // +1 for `'`
859
860 Some(Ok(Attr::SingleQ(key, value)))
861 }
862
863 pub fn next(&mut self, slice: &[u8]) -> Option<AttrResult> {
864 let mut iter = match self.recover(slice) {
865 Some(offset) => (offset..).zip(slice[offset..].iter()),
866 None => return None,
867 };
868
869 // Index where next key started
870 let start_key = match iter.find(|(_, &b)| !is_whitespace(b)) {
871 // Input: ` key`
872 // ^
873 Some((s, _)) => s,
874 // Input: ` `
875 // ^
876 None => {
877 // Because we reach end-of-input, stop iteration on next call
878 self.state = State::Done;
879 return None;
880 }
881 };
882 // Span of a key
883 let (key, offset) = match iter.find(|(_, &b)| b == b'=' || is_whitespace(b)) {
884 // Input: ` key=`
885 // | ^
886 // s e
887 Some((e, b'=')) => (start_key..e, e),
888
889 // Input: ` key `
890 // ^
891 Some((e, _)) => match iter.find(|(_, &b)| !is_whitespace(b)) {
892 // Input: ` key =`
893 // | | ^
894 // start_key e
895 Some((offset, b'=')) => (start_key..e, offset),
896 // Input: ` key x`
897 // | | ^
898 // start_key e
899 // If HTML-like attributes is allowed, this is the result, otherwise error
900 Some((offset, _)) => {
901 // In any case, recovering is not required
902 self.state = State::Next(offset);
903 return self.key_only(slice, start_key..e, offset);
904 }
905 // Input: ` key `
906 // | | ^
907 // start_key e
908 // If HTML-like attributes is allowed, this is the result, otherwise error
909 None => {
910 // Because we reach end-of-input, stop iteration on next call
911 self.state = State::Done;
912 return self.key_only(slice, start_key..e, slice.len());
913 }
914 },
915
916 // Input: ` key`
917 // | ^
918 // s e = len()
919 // If HTML-like attributes is allowed, this is the result, otherwise error
920 None => {
921 // Because we reach end-of-input, stop iteration on next call
922 self.state = State::Done;
923 let e = slice.len();
924 return self.key_only(slice, start_key..e, e);
925 }
926 };
927
928 let key = match self.check_for_duplicates(slice, key) {
929 Err(e) => {
930 self.state = State::SkipEqValue(offset);
931 return Some(Err(e));
932 }
933 Ok(key) => key,
934 };
935
936 ////////////////////////////////////////////////////////////////////////
937
938 // Gets the position of quote and quote type
939 let (start_value, quote) = match iter.find(|(_, &b)| !is_whitespace(b)) {
940 // Input: ` key = "`
941 // ^
942 Some((s, b'"')) => (s + 1, b'"'),
943 // Input: ` key = '`
944 // ^
945 Some((s, b'\'')) => (s + 1, b'\''),
946
947 // Input: ` key = x`
948 // ^
949 // If HTML-like attributes is allowed, this is the start of the value
950 Some((s, _)) if self.html => {
951 // We do not check validity of attribute value characters as required
952 // according to https://html.spec.whatwg.org/#unquoted. It can be done
953 // during validation phase
954 let end = match iter.find(|(_, &b)| is_whitespace(b)) {
955 // Input: ` key = value `
956 // | ^
957 // s e
958 Some((e, _)) => e,
959 // Input: ` key = value`
960 // | ^
961 // s e = len()
962 None => slice.len(),
963 };
964 self.state = State::Next(end);
965 return Some(Ok(Attr::Unquoted(key, s..end)));
966 }
967 // Input: ` key = x`
968 // ^
969 Some((s, _)) => {
970 self.state = State::SkipValue(s);
971 return Some(Err(AttrError::UnquotedValue(s)));
972 }
973
974 // Input: ` key = `
975 // ^
976 None => {
977 // Because we reach end-of-input, stop iteration on next call
978 self.state = State::Done;
979 return Some(Err(AttrError::ExpectedValue(slice.len())));
980 }
981 };
982
983 match iter.find(|(_, &b)| b == quote) {
984 // Input: ` key = " "`
985 // ^
986 Some((e, b'"')) => self.double_q(key, start_value..e),
987 // Input: ` key = ' '`
988 // ^
989 Some((e, _)) => self.single_q(key, start_value..e),
990
991 // Input: ` key = " `
992 // Input: ` key = ' `
993 // ^
994 // Closing quote not found
995 None => {
996 // Because we reach end-of-input, stop iteration on next call
997 self.state = State::Done;
998 Some(Err(AttrError::ExpectedQuote(slice.len(), quote)))
999 }
1000 }
1001 }
1002}
1003
1004////////////////////////////////////////////////////////////////////////////////////////////////////
1005
1006/// Checks, how parsing of XML-style attributes works. Each attribute should
1007/// have a value, enclosed in single or double quotes.
1008#[cfg(test)]
1009mod xml {
1010 use super::*;
1011 use pretty_assertions::assert_eq;
1012
1013 /// Checked attribute is the single attribute
1014 mod single {
1015 use super::*;
1016 use pretty_assertions::assert_eq;
1017
1018 /// Attribute have a value enclosed in single quotes
1019 #[test]
1020 fn single_quoted() {
1021 let mut iter = Attributes::new(r#"tag key='value'"#, 3);
1022
1023 assert_eq!(
1024 iter.next(),
1025 Some(Ok(Attribute {
1026 key: QName(b"key"),
1027 value: Cow::Borrowed(b"value"),
1028 }))
1029 );
1030 assert_eq!(iter.next(), None);
1031 assert_eq!(iter.next(), None);
1032 }
1033
1034 /// Attribute have a value enclosed in double quotes
1035 #[test]
1036 fn double_quoted() {
1037 let mut iter = Attributes::new(r#"tag key="value""#, 3);
1038
1039 assert_eq!(
1040 iter.next(),
1041 Some(Ok(Attribute {
1042 key: QName(b"key"),
1043 value: Cow::Borrowed(b"value"),
1044 }))
1045 );
1046 assert_eq!(iter.next(), None);
1047 assert_eq!(iter.next(), None);
1048 }
1049
1050 /// Attribute have a value, not enclosed in quotes
1051 #[test]
1052 fn unquoted() {
1053 let mut iter = Attributes::new(r#"tag key=value"#, 3);
1054 // 0 ^ = 8
1055
1056 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8))));
1057 assert_eq!(iter.next(), None);
1058 assert_eq!(iter.next(), None);
1059 }
1060
1061 /// Only attribute key is present
1062 #[test]
1063 fn key_only() {
1064 let mut iter = Attributes::new(r#"tag key"#, 3);
1065 // 0 ^ = 7
1066
1067 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(7))));
1068 assert_eq!(iter.next(), None);
1069 assert_eq!(iter.next(), None);
1070 }
1071
1072 /// Key is started with an invalid symbol (a single quote in this test).
1073 /// Because we do not check validity of keys and values during parsing,
1074 /// that invalid attribute will be returned
1075 #[test]
1076 fn key_start_invalid() {
1077 let mut iter = Attributes::new(r#"tag 'key'='value'"#, 3);
1078
1079 assert_eq!(
1080 iter.next(),
1081 Some(Ok(Attribute {
1082 key: QName(b"'key'"),
1083 value: Cow::Borrowed(b"value"),
1084 }))
1085 );
1086 assert_eq!(iter.next(), None);
1087 assert_eq!(iter.next(), None);
1088 }
1089
1090 /// Key contains an invalid symbol (an ampersand in this test).
1091 /// Because we do not check validity of keys and values during parsing,
1092 /// that invalid attribute will be returned
1093 #[test]
1094 fn key_contains_invalid() {
1095 let mut iter = Attributes::new(r#"tag key&jey='value'"#, 3);
1096
1097 assert_eq!(
1098 iter.next(),
1099 Some(Ok(Attribute {
1100 key: QName(b"key&jey"),
1101 value: Cow::Borrowed(b"value"),
1102 }))
1103 );
1104 assert_eq!(iter.next(), None);
1105 assert_eq!(iter.next(), None);
1106 }
1107
1108 /// Attribute value is missing after `=`
1109 #[test]
1110 fn missed_value() {
1111 let mut iter = Attributes::new(r#"tag key="#, 3);
1112 // 0 ^ = 8
1113
1114 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8))));
1115 assert_eq!(iter.next(), None);
1116 assert_eq!(iter.next(), None);
1117 }
1118 }
1119
1120 /// Checked attribute is the first attribute in the list of many attributes
1121 mod first {
1122 use super::*;
1123 use pretty_assertions::assert_eq;
1124
1125 /// Attribute have a value enclosed in single quotes
1126 #[test]
1127 fn single_quoted() {
1128 let mut iter = Attributes::new(r#"tag key='value' regular='attribute'"#, 3);
1129
1130 assert_eq!(
1131 iter.next(),
1132 Some(Ok(Attribute {
1133 key: QName(b"key"),
1134 value: Cow::Borrowed(b"value"),
1135 }))
1136 );
1137 assert_eq!(
1138 iter.next(),
1139 Some(Ok(Attribute {
1140 key: QName(b"regular"),
1141 value: Cow::Borrowed(b"attribute"),
1142 }))
1143 );
1144 assert_eq!(iter.next(), None);
1145 assert_eq!(iter.next(), None);
1146 }
1147
1148 /// Attribute have a value enclosed in double quotes
1149 #[test]
1150 fn double_quoted() {
1151 let mut iter = Attributes::new(r#"tag key="value" regular='attribute'"#, 3);
1152
1153 assert_eq!(
1154 iter.next(),
1155 Some(Ok(Attribute {
1156 key: QName(b"key"),
1157 value: Cow::Borrowed(b"value"),
1158 }))
1159 );
1160 assert_eq!(
1161 iter.next(),
1162 Some(Ok(Attribute {
1163 key: QName(b"regular"),
1164 value: Cow::Borrowed(b"attribute"),
1165 }))
1166 );
1167 assert_eq!(iter.next(), None);
1168 assert_eq!(iter.next(), None);
1169 }
1170
1171 /// Attribute have a value, not enclosed in quotes
1172 #[test]
1173 fn unquoted() {
1174 let mut iter = Attributes::new(r#"tag key=value regular='attribute'"#, 3);
1175 // 0 ^ = 8
1176
1177 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8))));
1178 // check error recovery
1179 assert_eq!(
1180 iter.next(),
1181 Some(Ok(Attribute {
1182 key: QName(b"regular"),
1183 value: Cow::Borrowed(b"attribute"),
1184 }))
1185 );
1186 assert_eq!(iter.next(), None);
1187 assert_eq!(iter.next(), None);
1188 }
1189
1190 /// Only attribute key is present
1191 #[test]
1192 fn key_only() {
1193 let mut iter = Attributes::new(r#"tag key regular='attribute'"#, 3);
1194 // 0 ^ = 8
1195
1196 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8))));
1197 // check error recovery
1198 assert_eq!(
1199 iter.next(),
1200 Some(Ok(Attribute {
1201 key: QName(b"regular"),
1202 value: Cow::Borrowed(b"attribute"),
1203 }))
1204 );
1205 assert_eq!(iter.next(), None);
1206 assert_eq!(iter.next(), None);
1207 }
1208
1209 /// Key is started with an invalid symbol (a single quote in this test).
1210 /// Because we do not check validity of keys and values during parsing,
1211 /// that invalid attribute will be returned
1212 #[test]
1213 fn key_start_invalid() {
1214 let mut iter = Attributes::new(r#"tag 'key'='value' regular='attribute'"#, 3);
1215
1216 assert_eq!(
1217 iter.next(),
1218 Some(Ok(Attribute {
1219 key: QName(b"'key'"),
1220 value: Cow::Borrowed(b"value"),
1221 }))
1222 );
1223 assert_eq!(
1224 iter.next(),
1225 Some(Ok(Attribute {
1226 key: QName(b"regular"),
1227 value: Cow::Borrowed(b"attribute"),
1228 }))
1229 );
1230 assert_eq!(iter.next(), None);
1231 assert_eq!(iter.next(), None);
1232 }
1233
1234 /// Key contains an invalid symbol (an ampersand in this test).
1235 /// Because we do not check validity of keys and values during parsing,
1236 /// that invalid attribute will be returned
1237 #[test]
1238 fn key_contains_invalid() {
1239 let mut iter = Attributes::new(r#"tag key&jey='value' regular='attribute'"#, 3);
1240
1241 assert_eq!(
1242 iter.next(),
1243 Some(Ok(Attribute {
1244 key: QName(b"key&jey"),
1245 value: Cow::Borrowed(b"value"),
1246 }))
1247 );
1248 assert_eq!(
1249 iter.next(),
1250 Some(Ok(Attribute {
1251 key: QName(b"regular"),
1252 value: Cow::Borrowed(b"attribute"),
1253 }))
1254 );
1255 assert_eq!(iter.next(), None);
1256 assert_eq!(iter.next(), None);
1257 }
1258
1259 /// Attribute value is missing after `=`.
1260 #[test]
1261 fn missed_value() {
1262 let mut iter = Attributes::new(r#"tag key= regular='attribute'"#, 3);
1263 // 0 ^ = 9
1264
1265 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1266 // Because we do not check validity of keys and values during parsing,
1267 // "error='recovery'" is considered, as unquoted attribute value and
1268 // skipped during recovery and iteration finished
1269 assert_eq!(iter.next(), None);
1270 assert_eq!(iter.next(), None);
1271
1272 ////////////////////////////////////////////////////////////////////
1273
1274 let mut iter = Attributes::new(r#"tag key= regular= 'attribute'"#, 3);
1275 // 0 ^ = 9 ^ = 29
1276
1277 // In that case "regular=" considered as unquoted value
1278 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1279 // In that case "'attribute'" considered as a key, because we do not check
1280 // validity of key names
1281 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29))));
1282 assert_eq!(iter.next(), None);
1283 assert_eq!(iter.next(), None);
1284
1285 ////////////////////////////////////////////////////////////////////
1286
1287 let mut iter = Attributes::new(r#"tag key= regular ='attribute'"#, 3);
1288 // 0 ^ = 9 ^ = 29
1289
1290 // In that case "regular" considered as unquoted value
1291 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1292 // In that case "='attribute'" considered as a key, because we do not check
1293 // validity of key names
1294 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29))));
1295 assert_eq!(iter.next(), None);
1296 assert_eq!(iter.next(), None);
1297
1298 ////////////////////////////////////////////////////////////////////
1299
1300 let mut iter = Attributes::new(r#"tag key= regular = 'attribute'"#, 3);
1301 // 0 ^ = 9 ^ = 19 ^ = 30
1302
1303 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1304 // In that case second "=" considered as a key, because we do not check
1305 // validity of key names
1306 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(19))));
1307 // In that case "'attribute'" considered as a key, because we do not check
1308 // validity of key names
1309 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(30))));
1310 assert_eq!(iter.next(), None);
1311 assert_eq!(iter.next(), None);
1312 }
1313 }
1314
1315 /// Copy of single, but with additional spaces in markup
1316 mod sparsed {
1317 use super::*;
1318 use pretty_assertions::assert_eq;
1319
1320 /// Attribute have a value enclosed in single quotes
1321 #[test]
1322 fn single_quoted() {
1323 let mut iter = Attributes::new(r#"tag key = 'value' "#, 3);
1324
1325 assert_eq!(
1326 iter.next(),
1327 Some(Ok(Attribute {
1328 key: QName(b"key"),
1329 value: Cow::Borrowed(b"value"),
1330 }))
1331 );
1332 assert_eq!(iter.next(), None);
1333 assert_eq!(iter.next(), None);
1334 }
1335
1336 /// Attribute have a value enclosed in double quotes
1337 #[test]
1338 fn double_quoted() {
1339 let mut iter = Attributes::new(r#"tag key = "value" "#, 3);
1340
1341 assert_eq!(
1342 iter.next(),
1343 Some(Ok(Attribute {
1344 key: QName(b"key"),
1345 value: Cow::Borrowed(b"value"),
1346 }))
1347 );
1348 assert_eq!(iter.next(), None);
1349 assert_eq!(iter.next(), None);
1350 }
1351
1352 /// Attribute have a value, not enclosed in quotes
1353 #[test]
1354 fn unquoted() {
1355 let mut iter = Attributes::new(r#"tag key = value "#, 3);
1356 // 0 ^ = 10
1357
1358 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(10))));
1359 assert_eq!(iter.next(), None);
1360 assert_eq!(iter.next(), None);
1361 }
1362
1363 /// Only attribute key is present
1364 #[test]
1365 fn key_only() {
1366 let mut iter = Attributes::new(r#"tag key "#, 3);
1367 // 0 ^ = 8
1368
1369 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8))));
1370 assert_eq!(iter.next(), None);
1371 assert_eq!(iter.next(), None);
1372 }
1373
1374 /// Key is started with an invalid symbol (a single quote in this test).
1375 /// Because we do not check validity of keys and values during parsing,
1376 /// that invalid attribute will be returned
1377 #[test]
1378 fn key_start_invalid() {
1379 let mut iter = Attributes::new(r#"tag 'key' = 'value' "#, 3);
1380
1381 assert_eq!(
1382 iter.next(),
1383 Some(Ok(Attribute {
1384 key: QName(b"'key'"),
1385 value: Cow::Borrowed(b"value"),
1386 }))
1387 );
1388 assert_eq!(iter.next(), None);
1389 assert_eq!(iter.next(), None);
1390 }
1391
1392 /// Key contains an invalid symbol (an ampersand in this test).
1393 /// Because we do not check validity of keys and values during parsing,
1394 /// that invalid attribute will be returned
1395 #[test]
1396 fn key_contains_invalid() {
1397 let mut iter = Attributes::new(r#"tag key&jey = 'value' "#, 3);
1398
1399 assert_eq!(
1400 iter.next(),
1401 Some(Ok(Attribute {
1402 key: QName(b"key&jey"),
1403 value: Cow::Borrowed(b"value"),
1404 }))
1405 );
1406 assert_eq!(iter.next(), None);
1407 assert_eq!(iter.next(), None);
1408 }
1409
1410 /// Attribute value is missing after `=`
1411 #[test]
1412 fn missed_value() {
1413 let mut iter = Attributes::new(r#"tag key = "#, 3);
1414 // 0 ^ = 10
1415
1416 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10))));
1417 assert_eq!(iter.next(), None);
1418 assert_eq!(iter.next(), None);
1419 }
1420 }
1421
1422 /// Checks that duplicated attributes correctly reported and recovering is
1423 /// possible after that
1424 mod duplicated {
1425 use super::*;
1426
1427 mod with_check {
1428 use super::*;
1429 use pretty_assertions::assert_eq;
1430
1431 /// Attribute have a value enclosed in single quotes
1432 #[test]
1433 fn single_quoted() {
1434 let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3);
1435 // 0 ^ = 4 ^ = 16
1436
1437 assert_eq!(
1438 iter.next(),
1439 Some(Ok(Attribute {
1440 key: QName(b"key"),
1441 value: Cow::Borrowed(b"value"),
1442 }))
1443 );
1444 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
1445 assert_eq!(
1446 iter.next(),
1447 Some(Ok(Attribute {
1448 key: QName(b"another"),
1449 value: Cow::Borrowed(b""),
1450 }))
1451 );
1452 assert_eq!(iter.next(), None);
1453 assert_eq!(iter.next(), None);
1454 }
1455
1456 /// Attribute have a value enclosed in double quotes
1457 #[test]
1458 fn double_quoted() {
1459 let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3);
1460 // 0 ^ = 4 ^ = 16
1461
1462 assert_eq!(
1463 iter.next(),
1464 Some(Ok(Attribute {
1465 key: QName(b"key"),
1466 value: Cow::Borrowed(b"value"),
1467 }))
1468 );
1469 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
1470 assert_eq!(
1471 iter.next(),
1472 Some(Ok(Attribute {
1473 key: QName(b"another"),
1474 value: Cow::Borrowed(b""),
1475 }))
1476 );
1477 assert_eq!(iter.next(), None);
1478 assert_eq!(iter.next(), None);
1479 }
1480
1481 /// Attribute have a value, not enclosed in quotes
1482 #[test]
1483 fn unquoted() {
1484 let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3);
1485 // 0 ^ = 4 ^ = 16
1486
1487 assert_eq!(
1488 iter.next(),
1489 Some(Ok(Attribute {
1490 key: QName(b"key"),
1491 value: Cow::Borrowed(b"value"),
1492 }))
1493 );
1494 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
1495 assert_eq!(
1496 iter.next(),
1497 Some(Ok(Attribute {
1498 key: QName(b"another"),
1499 value: Cow::Borrowed(b""),
1500 }))
1501 );
1502 assert_eq!(iter.next(), None);
1503 assert_eq!(iter.next(), None);
1504 }
1505
1506 /// Only attribute key is present
1507 #[test]
1508 fn key_only() {
1509 let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3);
1510 // 0 ^ = 20
1511
1512 assert_eq!(
1513 iter.next(),
1514 Some(Ok(Attribute {
1515 key: QName(b"key"),
1516 value: Cow::Borrowed(b"value"),
1517 }))
1518 );
1519 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20))));
1520 assert_eq!(
1521 iter.next(),
1522 Some(Ok(Attribute {
1523 key: QName(b"another"),
1524 value: Cow::Borrowed(b""),
1525 }))
1526 );
1527 assert_eq!(iter.next(), None);
1528 assert_eq!(iter.next(), None);
1529 }
1530 }
1531
1532 /// Check for duplicated names is disabled
1533 mod without_check {
1534 use super::*;
1535 use pretty_assertions::assert_eq;
1536
1537 /// Attribute have a value enclosed in single quotes
1538 #[test]
1539 fn single_quoted() {
1540 let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3);
1541 iter.with_checks(false);
1542
1543 assert_eq!(
1544 iter.next(),
1545 Some(Ok(Attribute {
1546 key: QName(b"key"),
1547 value: Cow::Borrowed(b"value"),
1548 }))
1549 );
1550 assert_eq!(
1551 iter.next(),
1552 Some(Ok(Attribute {
1553 key: QName(b"key"),
1554 value: Cow::Borrowed(b"dup"),
1555 }))
1556 );
1557 assert_eq!(
1558 iter.next(),
1559 Some(Ok(Attribute {
1560 key: QName(b"another"),
1561 value: Cow::Borrowed(b""),
1562 }))
1563 );
1564 assert_eq!(iter.next(), None);
1565 assert_eq!(iter.next(), None);
1566 }
1567
1568 /// Attribute have a value enclosed in double quotes
1569 #[test]
1570 fn double_quoted() {
1571 let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3);
1572 iter.with_checks(false);
1573
1574 assert_eq!(
1575 iter.next(),
1576 Some(Ok(Attribute {
1577 key: QName(b"key"),
1578 value: Cow::Borrowed(b"value"),
1579 }))
1580 );
1581 assert_eq!(
1582 iter.next(),
1583 Some(Ok(Attribute {
1584 key: QName(b"key"),
1585 value: Cow::Borrowed(b"dup"),
1586 }))
1587 );
1588 assert_eq!(
1589 iter.next(),
1590 Some(Ok(Attribute {
1591 key: QName(b"another"),
1592 value: Cow::Borrowed(b""),
1593 }))
1594 );
1595 assert_eq!(iter.next(), None);
1596 assert_eq!(iter.next(), None);
1597 }
1598
1599 /// Attribute have a value, not enclosed in quotes
1600 #[test]
1601 fn unquoted() {
1602 let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3);
1603 // 0 ^ = 20
1604 iter.with_checks(false);
1605
1606 assert_eq!(
1607 iter.next(),
1608 Some(Ok(Attribute {
1609 key: QName(b"key"),
1610 value: Cow::Borrowed(b"value"),
1611 }))
1612 );
1613 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(20))));
1614 assert_eq!(
1615 iter.next(),
1616 Some(Ok(Attribute {
1617 key: QName(b"another"),
1618 value: Cow::Borrowed(b""),
1619 }))
1620 );
1621 assert_eq!(iter.next(), None);
1622 assert_eq!(iter.next(), None);
1623 }
1624
1625 /// Only attribute key is present
1626 #[test]
1627 fn key_only() {
1628 let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3);
1629 // 0 ^ = 20
1630 iter.with_checks(false);
1631
1632 assert_eq!(
1633 iter.next(),
1634 Some(Ok(Attribute {
1635 key: QName(b"key"),
1636 value: Cow::Borrowed(b"value"),
1637 }))
1638 );
1639 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20))));
1640 assert_eq!(
1641 iter.next(),
1642 Some(Ok(Attribute {
1643 key: QName(b"another"),
1644 value: Cow::Borrowed(b""),
1645 }))
1646 );
1647 assert_eq!(iter.next(), None);
1648 assert_eq!(iter.next(), None);
1649 }
1650 }
1651 }
1652
1653 #[test]
1654 fn mixed_quote() {
1655 let mut iter = Attributes::new(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3);
1656
1657 assert_eq!(
1658 iter.next(),
1659 Some(Ok(Attribute {
1660 key: QName(b"a"),
1661 value: Cow::Borrowed(b"a"),
1662 }))
1663 );
1664 assert_eq!(
1665 iter.next(),
1666 Some(Ok(Attribute {
1667 key: QName(b"b"),
1668 value: Cow::Borrowed(b"b"),
1669 }))
1670 );
1671 assert_eq!(
1672 iter.next(),
1673 Some(Ok(Attribute {
1674 key: QName(b"c"),
1675 value: Cow::Borrowed(br#"cc"cc"#),
1676 }))
1677 );
1678 assert_eq!(
1679 iter.next(),
1680 Some(Ok(Attribute {
1681 key: QName(b"d"),
1682 value: Cow::Borrowed(b"dd'dd"),
1683 }))
1684 );
1685 assert_eq!(iter.next(), None);
1686 assert_eq!(iter.next(), None);
1687 }
1688}
1689
1690/// Checks, how parsing of HTML-style attributes works. Each attribute can be
1691/// in three forms:
1692/// - XML-like: have a value, enclosed in single or double quotes
1693/// - have a value, do not enclosed in quotes
1694/// - without value, key only
1695#[cfg(test)]
1696mod html {
1697 use super::*;
1698 use pretty_assertions::assert_eq;
1699
1700 /// Checked attribute is the single attribute
1701 mod single {
1702 use super::*;
1703 use pretty_assertions::assert_eq;
1704
1705 /// Attribute have a value enclosed in single quotes
1706 #[test]
1707 fn single_quoted() {
1708 let mut iter = Attributes::html(r#"tag key='value'"#, 3);
1709
1710 assert_eq!(
1711 iter.next(),
1712 Some(Ok(Attribute {
1713 key: QName(b"key"),
1714 value: Cow::Borrowed(b"value"),
1715 }))
1716 );
1717 assert_eq!(iter.next(), None);
1718 assert_eq!(iter.next(), None);
1719 }
1720
1721 /// Attribute have a value enclosed in double quotes
1722 #[test]
1723 fn double_quoted() {
1724 let mut iter = Attributes::html(r#"tag key="value""#, 3);
1725
1726 assert_eq!(
1727 iter.next(),
1728 Some(Ok(Attribute {
1729 key: QName(b"key"),
1730 value: Cow::Borrowed(b"value"),
1731 }))
1732 );
1733 assert_eq!(iter.next(), None);
1734 assert_eq!(iter.next(), None);
1735 }
1736
1737 /// Attribute have a value, not enclosed in quotes
1738 #[test]
1739 fn unquoted() {
1740 let mut iter = Attributes::html(r#"tag key=value"#, 3);
1741
1742 assert_eq!(
1743 iter.next(),
1744 Some(Ok(Attribute {
1745 key: QName(b"key"),
1746 value: Cow::Borrowed(b"value"),
1747 }))
1748 );
1749 assert_eq!(iter.next(), None);
1750 assert_eq!(iter.next(), None);
1751 }
1752
1753 /// Only attribute key is present
1754 #[test]
1755 fn key_only() {
1756 let mut iter = Attributes::html(r#"tag key"#, 3);
1757
1758 assert_eq!(
1759 iter.next(),
1760 Some(Ok(Attribute {
1761 key: QName(b"key"),
1762 value: Cow::Borrowed(&[]),
1763 }))
1764 );
1765 assert_eq!(iter.next(), None);
1766 assert_eq!(iter.next(), None);
1767 }
1768
1769 /// Key is started with an invalid symbol (a single quote in this test).
1770 /// Because we do not check validity of keys and values during parsing,
1771 /// that invalid attribute will be returned
1772 #[test]
1773 fn key_start_invalid() {
1774 let mut iter = Attributes::html(r#"tag 'key'='value'"#, 3);
1775
1776 assert_eq!(
1777 iter.next(),
1778 Some(Ok(Attribute {
1779 key: QName(b"'key'"),
1780 value: Cow::Borrowed(b"value"),
1781 }))
1782 );
1783 assert_eq!(iter.next(), None);
1784 assert_eq!(iter.next(), None);
1785 }
1786
1787 /// Key contains an invalid symbol (an ampersand in this test).
1788 /// Because we do not check validity of keys and values during parsing,
1789 /// that invalid attribute will be returned
1790 #[test]
1791 fn key_contains_invalid() {
1792 let mut iter = Attributes::html(r#"tag key&jey='value'"#, 3);
1793
1794 assert_eq!(
1795 iter.next(),
1796 Some(Ok(Attribute {
1797 key: QName(b"key&jey"),
1798 value: Cow::Borrowed(b"value"),
1799 }))
1800 );
1801 assert_eq!(iter.next(), None);
1802 assert_eq!(iter.next(), None);
1803 }
1804
1805 /// Attribute value is missing after `=`
1806 #[test]
1807 fn missed_value() {
1808 let mut iter = Attributes::html(r#"tag key="#, 3);
1809 // 0 ^ = 8
1810
1811 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8))));
1812 assert_eq!(iter.next(), None);
1813 assert_eq!(iter.next(), None);
1814 }
1815 }
1816
1817 /// Checked attribute is the first attribute in the list of many attributes
1818 mod first {
1819 use super::*;
1820 use pretty_assertions::assert_eq;
1821
1822 /// Attribute have a value enclosed in single quotes
1823 #[test]
1824 fn single_quoted() {
1825 let mut iter = Attributes::html(r#"tag key='value' regular='attribute'"#, 3);
1826
1827 assert_eq!(
1828 iter.next(),
1829 Some(Ok(Attribute {
1830 key: QName(b"key"),
1831 value: Cow::Borrowed(b"value"),
1832 }))
1833 );
1834 assert_eq!(
1835 iter.next(),
1836 Some(Ok(Attribute {
1837 key: QName(b"regular"),
1838 value: Cow::Borrowed(b"attribute"),
1839 }))
1840 );
1841 assert_eq!(iter.next(), None);
1842 assert_eq!(iter.next(), None);
1843 }
1844
1845 /// Attribute have a value enclosed in double quotes
1846 #[test]
1847 fn double_quoted() {
1848 let mut iter = Attributes::html(r#"tag key="value" regular='attribute'"#, 3);
1849
1850 assert_eq!(
1851 iter.next(),
1852 Some(Ok(Attribute {
1853 key: QName(b"key"),
1854 value: Cow::Borrowed(b"value"),
1855 }))
1856 );
1857 assert_eq!(
1858 iter.next(),
1859 Some(Ok(Attribute {
1860 key: QName(b"regular"),
1861 value: Cow::Borrowed(b"attribute"),
1862 }))
1863 );
1864 assert_eq!(iter.next(), None);
1865 assert_eq!(iter.next(), None);
1866 }
1867
1868 /// Attribute have a value, not enclosed in quotes
1869 #[test]
1870 fn unquoted() {
1871 let mut iter = Attributes::html(r#"tag key=value regular='attribute'"#, 3);
1872
1873 assert_eq!(
1874 iter.next(),
1875 Some(Ok(Attribute {
1876 key: QName(b"key"),
1877 value: Cow::Borrowed(b"value"),
1878 }))
1879 );
1880 assert_eq!(
1881 iter.next(),
1882 Some(Ok(Attribute {
1883 key: QName(b"regular"),
1884 value: Cow::Borrowed(b"attribute"),
1885 }))
1886 );
1887 assert_eq!(iter.next(), None);
1888 assert_eq!(iter.next(), None);
1889 }
1890
1891 /// Only attribute key is present
1892 #[test]
1893 fn key_only() {
1894 let mut iter = Attributes::html(r#"tag key regular='attribute'"#, 3);
1895
1896 assert_eq!(
1897 iter.next(),
1898 Some(Ok(Attribute {
1899 key: QName(b"key"),
1900 value: Cow::Borrowed(&[]),
1901 }))
1902 );
1903 assert_eq!(
1904 iter.next(),
1905 Some(Ok(Attribute {
1906 key: QName(b"regular"),
1907 value: Cow::Borrowed(b"attribute"),
1908 }))
1909 );
1910 assert_eq!(iter.next(), None);
1911 assert_eq!(iter.next(), None);
1912 }
1913
1914 /// Key is started with an invalid symbol (a single quote in this test).
1915 /// Because we do not check validity of keys and values during parsing,
1916 /// that invalid attribute will be returned
1917 #[test]
1918 fn key_start_invalid() {
1919 let mut iter = Attributes::html(r#"tag 'key'='value' regular='attribute'"#, 3);
1920
1921 assert_eq!(
1922 iter.next(),
1923 Some(Ok(Attribute {
1924 key: QName(b"'key'"),
1925 value: Cow::Borrowed(b"value"),
1926 }))
1927 );
1928 assert_eq!(
1929 iter.next(),
1930 Some(Ok(Attribute {
1931 key: QName(b"regular"),
1932 value: Cow::Borrowed(b"attribute"),
1933 }))
1934 );
1935 assert_eq!(iter.next(), None);
1936 assert_eq!(iter.next(), None);
1937 }
1938
1939 /// Key contains an invalid symbol (an ampersand in this test).
1940 /// Because we do not check validity of keys and values during parsing,
1941 /// that invalid attribute will be returned
1942 #[test]
1943 fn key_contains_invalid() {
1944 let mut iter = Attributes::html(r#"tag key&jey='value' regular='attribute'"#, 3);
1945
1946 assert_eq!(
1947 iter.next(),
1948 Some(Ok(Attribute {
1949 key: QName(b"key&jey"),
1950 value: Cow::Borrowed(b"value"),
1951 }))
1952 );
1953 assert_eq!(
1954 iter.next(),
1955 Some(Ok(Attribute {
1956 key: QName(b"regular"),
1957 value: Cow::Borrowed(b"attribute"),
1958 }))
1959 );
1960 assert_eq!(iter.next(), None);
1961 assert_eq!(iter.next(), None);
1962 }
1963
1964 /// Attribute value is missing after `=`
1965 #[test]
1966 fn missed_value() {
1967 let mut iter = Attributes::html(r#"tag key= regular='attribute'"#, 3);
1968
1969 // Because we do not check validity of keys and values during parsing,
1970 // "regular='attribute'" is considered as unquoted attribute value
1971 assert_eq!(
1972 iter.next(),
1973 Some(Ok(Attribute {
1974 key: QName(b"key"),
1975 value: Cow::Borrowed(b"regular='attribute'"),
1976 }))
1977 );
1978 assert_eq!(iter.next(), None);
1979 assert_eq!(iter.next(), None);
1980
1981 ////////////////////////////////////////////////////////////////////
1982
1983 let mut iter = Attributes::html(r#"tag key= regular= 'attribute'"#, 3);
1984
1985 // Because we do not check validity of keys and values during parsing,
1986 // "regular=" is considered as unquoted attribute value
1987 assert_eq!(
1988 iter.next(),
1989 Some(Ok(Attribute {
1990 key: QName(b"key"),
1991 value: Cow::Borrowed(b"regular="),
1992 }))
1993 );
1994 // Because we do not check validity of keys and values during parsing,
1995 // "'attribute'" is considered as key-only attribute
1996 assert_eq!(
1997 iter.next(),
1998 Some(Ok(Attribute {
1999 key: QName(b"'attribute'"),
2000 value: Cow::Borrowed(&[]),
2001 }))
2002 );
2003 assert_eq!(iter.next(), None);
2004 assert_eq!(iter.next(), None);
2005
2006 ////////////////////////////////////////////////////////////////////
2007
2008 let mut iter = Attributes::html(r#"tag key= regular ='attribute'"#, 3);
2009
2010 // Because we do not check validity of keys and values during parsing,
2011 // "regular" is considered as unquoted attribute value
2012 assert_eq!(
2013 iter.next(),
2014 Some(Ok(Attribute {
2015 key: QName(b"key"),
2016 value: Cow::Borrowed(b"regular"),
2017 }))
2018 );
2019 // Because we do not check validity of keys and values during parsing,
2020 // "='attribute'" is considered as key-only attribute
2021 assert_eq!(
2022 iter.next(),
2023 Some(Ok(Attribute {
2024 key: QName(b"='attribute'"),
2025 value: Cow::Borrowed(&[]),
2026 }))
2027 );
2028 assert_eq!(iter.next(), None);
2029 assert_eq!(iter.next(), None);
2030
2031 ////////////////////////////////////////////////////////////////////
2032
2033 let mut iter = Attributes::html(r#"tag key= regular = 'attribute'"#, 3);
2034 // 0 ^ = 9 ^ = 19 ^ = 30
2035
2036 // Because we do not check validity of keys and values during parsing,
2037 // "regular" is considered as unquoted attribute value
2038 assert_eq!(
2039 iter.next(),
2040 Some(Ok(Attribute {
2041 key: QName(b"key"),
2042 value: Cow::Borrowed(b"regular"),
2043 }))
2044 );
2045 // Because we do not check validity of keys and values during parsing,
2046 // "=" is considered as key-only attribute
2047 assert_eq!(
2048 iter.next(),
2049 Some(Ok(Attribute {
2050 key: QName(b"="),
2051 value: Cow::Borrowed(&[]),
2052 }))
2053 );
2054 // Because we do not check validity of keys and values during parsing,
2055 // "'attribute'" is considered as key-only attribute
2056 assert_eq!(
2057 iter.next(),
2058 Some(Ok(Attribute {
2059 key: QName(b"'attribute'"),
2060 value: Cow::Borrowed(&[]),
2061 }))
2062 );
2063 assert_eq!(iter.next(), None);
2064 assert_eq!(iter.next(), None);
2065 }
2066 }
2067
2068 /// Copy of single, but with additional spaces in markup
2069 mod sparsed {
2070 use super::*;
2071 use pretty_assertions::assert_eq;
2072
2073 /// Attribute have a value enclosed in single quotes
2074 #[test]
2075 fn single_quoted() {
2076 let mut iter = Attributes::html(r#"tag key = 'value' "#, 3);
2077
2078 assert_eq!(
2079 iter.next(),
2080 Some(Ok(Attribute {
2081 key: QName(b"key"),
2082 value: Cow::Borrowed(b"value"),
2083 }))
2084 );
2085 assert_eq!(iter.next(), None);
2086 assert_eq!(iter.next(), None);
2087 }
2088
2089 /// Attribute have a value enclosed in double quotes
2090 #[test]
2091 fn double_quoted() {
2092 let mut iter = Attributes::html(r#"tag key = "value" "#, 3);
2093
2094 assert_eq!(
2095 iter.next(),
2096 Some(Ok(Attribute {
2097 key: QName(b"key"),
2098 value: Cow::Borrowed(b"value"),
2099 }))
2100 );
2101 assert_eq!(iter.next(), None);
2102 assert_eq!(iter.next(), None);
2103 }
2104
2105 /// Attribute have a value, not enclosed in quotes
2106 #[test]
2107 fn unquoted() {
2108 let mut iter = Attributes::html(r#"tag key = value "#, 3);
2109
2110 assert_eq!(
2111 iter.next(),
2112 Some(Ok(Attribute {
2113 key: QName(b"key"),
2114 value: Cow::Borrowed(b"value"),
2115 }))
2116 );
2117 assert_eq!(iter.next(), None);
2118 assert_eq!(iter.next(), None);
2119 }
2120
2121 /// Only attribute key is present
2122 #[test]
2123 fn key_only() {
2124 let mut iter = Attributes::html(r#"tag key "#, 3);
2125
2126 assert_eq!(
2127 iter.next(),
2128 Some(Ok(Attribute {
2129 key: QName(b"key"),
2130 value: Cow::Borrowed(&[]),
2131 }))
2132 );
2133 assert_eq!(iter.next(), None);
2134 assert_eq!(iter.next(), None);
2135 }
2136
2137 /// Key is started with an invalid symbol (a single quote in this test).
2138 /// Because we do not check validity of keys and values during parsing,
2139 /// that invalid attribute will be returned
2140 #[test]
2141 fn key_start_invalid() {
2142 let mut iter = Attributes::html(r#"tag 'key' = 'value' "#, 3);
2143
2144 assert_eq!(
2145 iter.next(),
2146 Some(Ok(Attribute {
2147 key: QName(b"'key'"),
2148 value: Cow::Borrowed(b"value"),
2149 }))
2150 );
2151 assert_eq!(iter.next(), None);
2152 assert_eq!(iter.next(), None);
2153 }
2154
2155 /// Key contains an invalid symbol (an ampersand in this test).
2156 /// Because we do not check validity of keys and values during parsing,
2157 /// that invalid attribute will be returned
2158 #[test]
2159 fn key_contains_invalid() {
2160 let mut iter = Attributes::html(r#"tag key&jey = 'value' "#, 3);
2161
2162 assert_eq!(
2163 iter.next(),
2164 Some(Ok(Attribute {
2165 key: QName(b"key&jey"),
2166 value: Cow::Borrowed(b"value"),
2167 }))
2168 );
2169 assert_eq!(iter.next(), None);
2170 assert_eq!(iter.next(), None);
2171 }
2172
2173 /// Attribute value is missing after `=`
2174 #[test]
2175 fn missed_value() {
2176 let mut iter = Attributes::html(r#"tag key = "#, 3);
2177 // 0 ^ = 10
2178
2179 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10))));
2180 assert_eq!(iter.next(), None);
2181 assert_eq!(iter.next(), None);
2182 }
2183 }
2184
2185 /// Checks that duplicated attributes correctly reported and recovering is
2186 /// possible after that
2187 mod duplicated {
2188 use super::*;
2189
2190 mod with_check {
2191 use super::*;
2192 use pretty_assertions::assert_eq;
2193
2194 /// Attribute have a value enclosed in single quotes
2195 #[test]
2196 fn single_quoted() {
2197 let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3);
2198 // 0 ^ = 4 ^ = 16
2199
2200 assert_eq!(
2201 iter.next(),
2202 Some(Ok(Attribute {
2203 key: QName(b"key"),
2204 value: Cow::Borrowed(b"value"),
2205 }))
2206 );
2207 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2208 assert_eq!(
2209 iter.next(),
2210 Some(Ok(Attribute {
2211 key: QName(b"another"),
2212 value: Cow::Borrowed(b""),
2213 }))
2214 );
2215 assert_eq!(iter.next(), None);
2216 assert_eq!(iter.next(), None);
2217 }
2218
2219 /// Attribute have a value enclosed in double quotes
2220 #[test]
2221 fn double_quoted() {
2222 let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3);
2223 // 0 ^ = 4 ^ = 16
2224
2225 assert_eq!(
2226 iter.next(),
2227 Some(Ok(Attribute {
2228 key: QName(b"key"),
2229 value: Cow::Borrowed(b"value"),
2230 }))
2231 );
2232 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2233 assert_eq!(
2234 iter.next(),
2235 Some(Ok(Attribute {
2236 key: QName(b"another"),
2237 value: Cow::Borrowed(b""),
2238 }))
2239 );
2240 assert_eq!(iter.next(), None);
2241 assert_eq!(iter.next(), None);
2242 }
2243
2244 /// Attribute have a value, not enclosed in quotes
2245 #[test]
2246 fn unquoted() {
2247 let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3);
2248 // 0 ^ = 4 ^ = 16
2249
2250 assert_eq!(
2251 iter.next(),
2252 Some(Ok(Attribute {
2253 key: QName(b"key"),
2254 value: Cow::Borrowed(b"value"),
2255 }))
2256 );
2257 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2258 assert_eq!(
2259 iter.next(),
2260 Some(Ok(Attribute {
2261 key: QName(b"another"),
2262 value: Cow::Borrowed(b""),
2263 }))
2264 );
2265 assert_eq!(iter.next(), None);
2266 assert_eq!(iter.next(), None);
2267 }
2268
2269 /// Only attribute key is present
2270 #[test]
2271 fn key_only() {
2272 let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3);
2273 // 0 ^ = 4 ^ = 16
2274
2275 assert_eq!(
2276 iter.next(),
2277 Some(Ok(Attribute {
2278 key: QName(b"key"),
2279 value: Cow::Borrowed(b"value"),
2280 }))
2281 );
2282 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2283 assert_eq!(
2284 iter.next(),
2285 Some(Ok(Attribute {
2286 key: QName(b"another"),
2287 value: Cow::Borrowed(b""),
2288 }))
2289 );
2290 assert_eq!(iter.next(), None);
2291 assert_eq!(iter.next(), None);
2292 }
2293 }
2294
2295 /// Check for duplicated names is disabled
2296 mod without_check {
2297 use super::*;
2298 use pretty_assertions::assert_eq;
2299
2300 /// Attribute have a value enclosed in single quotes
2301 #[test]
2302 fn single_quoted() {
2303 let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3);
2304 iter.with_checks(false);
2305
2306 assert_eq!(
2307 iter.next(),
2308 Some(Ok(Attribute {
2309 key: QName(b"key"),
2310 value: Cow::Borrowed(b"value"),
2311 }))
2312 );
2313 assert_eq!(
2314 iter.next(),
2315 Some(Ok(Attribute {
2316 key: QName(b"key"),
2317 value: Cow::Borrowed(b"dup"),
2318 }))
2319 );
2320 assert_eq!(
2321 iter.next(),
2322 Some(Ok(Attribute {
2323 key: QName(b"another"),
2324 value: Cow::Borrowed(b""),
2325 }))
2326 );
2327 assert_eq!(iter.next(), None);
2328 assert_eq!(iter.next(), None);
2329 }
2330
2331 /// Attribute have a value enclosed in double quotes
2332 #[test]
2333 fn double_quoted() {
2334 let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3);
2335 iter.with_checks(false);
2336
2337 assert_eq!(
2338 iter.next(),
2339 Some(Ok(Attribute {
2340 key: QName(b"key"),
2341 value: Cow::Borrowed(b"value"),
2342 }))
2343 );
2344 assert_eq!(
2345 iter.next(),
2346 Some(Ok(Attribute {
2347 key: QName(b"key"),
2348 value: Cow::Borrowed(b"dup"),
2349 }))
2350 );
2351 assert_eq!(
2352 iter.next(),
2353 Some(Ok(Attribute {
2354 key: QName(b"another"),
2355 value: Cow::Borrowed(b""),
2356 }))
2357 );
2358 assert_eq!(iter.next(), None);
2359 assert_eq!(iter.next(), None);
2360 }
2361
2362 /// Attribute have a value, not enclosed in quotes
2363 #[test]
2364 fn unquoted() {
2365 let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3);
2366 iter.with_checks(false);
2367
2368 assert_eq!(
2369 iter.next(),
2370 Some(Ok(Attribute {
2371 key: QName(b"key"),
2372 value: Cow::Borrowed(b"value"),
2373 }))
2374 );
2375 assert_eq!(
2376 iter.next(),
2377 Some(Ok(Attribute {
2378 key: QName(b"key"),
2379 value: Cow::Borrowed(b"dup"),
2380 }))
2381 );
2382 assert_eq!(
2383 iter.next(),
2384 Some(Ok(Attribute {
2385 key: QName(b"another"),
2386 value: Cow::Borrowed(b""),
2387 }))
2388 );
2389 assert_eq!(iter.next(), None);
2390 assert_eq!(iter.next(), None);
2391 }
2392
2393 /// Only attribute key is present
2394 #[test]
2395 fn key_only() {
2396 let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3);
2397 iter.with_checks(false);
2398
2399 assert_eq!(
2400 iter.next(),
2401 Some(Ok(Attribute {
2402 key: QName(b"key"),
2403 value: Cow::Borrowed(b"value"),
2404 }))
2405 );
2406 assert_eq!(
2407 iter.next(),
2408 Some(Ok(Attribute {
2409 key: QName(b"key"),
2410 value: Cow::Borrowed(&[]),
2411 }))
2412 );
2413 assert_eq!(
2414 iter.next(),
2415 Some(Ok(Attribute {
2416 key: QName(b"another"),
2417 value: Cow::Borrowed(b""),
2418 }))
2419 );
2420 assert_eq!(iter.next(), None);
2421 assert_eq!(iter.next(), None);
2422 }
2423 }
2424 }
2425
2426 #[test]
2427 fn mixed_quote() {
2428 let mut iter = Attributes::html(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3);
2429
2430 assert_eq!(
2431 iter.next(),
2432 Some(Ok(Attribute {
2433 key: QName(b"a"),
2434 value: Cow::Borrowed(b"a"),
2435 }))
2436 );
2437 assert_eq!(
2438 iter.next(),
2439 Some(Ok(Attribute {
2440 key: QName(b"b"),
2441 value: Cow::Borrowed(b"b"),
2442 }))
2443 );
2444 assert_eq!(
2445 iter.next(),
2446 Some(Ok(Attribute {
2447 key: QName(b"c"),
2448 value: Cow::Borrowed(br#"cc"cc"#),
2449 }))
2450 );
2451 assert_eq!(
2452 iter.next(),
2453 Some(Ok(Attribute {
2454 key: QName(b"d"),
2455 value: Cow::Borrowed(b"dd'dd"),
2456 }))
2457 );
2458 assert_eq!(iter.next(), None);
2459 assert_eq!(iter.next(), None);
2460 }
2461}