Skip to main content

quick_xml/events/
attributes.rs

1//! Xml Attributes module
2//!
3//! Provides an iterator over attributes key/value pairs
4
5use crate::encoding::Decoder;
6use crate::errors::Result as XmlResult;
7use crate::escape::{escape, resolve_predefined_entity};
8use crate::name::{LocalName, Namespace, NamespaceResolver, QName};
9use crate::utils::{is_whitespace, Bytes};
10use crate::XmlVersion;
11
12use std::collections::HashSet;
13use std::fmt::{self, Debug, Display, Formatter};
14use std::hash::{BuildHasherDefault, DefaultHasher, Hasher};
15use std::iter::FusedIterator;
16use std::{borrow::Cow, ops::Range};
17
18/// A struct representing a key/value XML attribute.
19///
20/// Field `value` stores raw bytes, possibly containing escape-sequences. Most users will likely
21/// want to access the value using one of the [`normalized_value`] and [`decoded_and_normalized_value`]
22/// functions.
23///
24/// [`normalized_value`]: Self::normalized_value
25/// [`decoded_and_normalized_value`]: Self::decoded_and_normalized_value
26#[derive(Clone, Eq, PartialEq)]
27pub struct Attribute<'a> {
28    /// The key to uniquely define the attribute.
29    ///
30    /// If [`Attributes::with_checks`] is turned off, the key might not be unique.
31    pub key: QName<'a>,
32    /// The raw value of the attribute.
33    pub value: Cow<'a, [u8]>,
34}
35
36impl<'a> Attribute<'a> {
37    /// Returns the attribute value normalized as per [the XML specification] (or [for 1.0]).
38    ///
39    /// The document **must** be UTF-8 encoded, or pre-processed using [`DecodingReader`].
40    ///
41    /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
42    ///
43    /// The following escape sequences are replaced with their unescaped equivalents:
44    ///
45    /// | Escape Sequence | Replacement
46    /// |-----------------|------------
47    /// | `&lt;`          | `<`
48    /// | `&gt;`          | `>`
49    /// | `&amp;`         | `&`
50    /// | `&apos;`        | `'`
51    /// | `&quot;`        | `"`
52    ///
53    /// This will allocate unless the raw attribute value does not require normalization.
54    ///
55    /// Note, although you may use this library to parse HTML, you cannot use this
56    /// method to get HTML content, because its returns normalized value: the following
57    /// sequences are translated into a single space (U+0020) character:
58    ///
59    /// - `\r\n`
60    /// - `\r\x85` (only XML 1.1)
61    /// - `\r`
62    /// - `\n`
63    /// - `\t`
64    /// - `\x85` (only XML 1.1)
65    /// - `\x2028` (only XML 1.1)
66    ///
67    /// The text in HTML normally is not normalized in any way; normalization is
68    /// performed only in limited contexts and [only for] `\r\n` and `\r`.
69    ///
70    /// See also [`normalized_value_with()`](Self::normalized_value_with).
71    ///
72    /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
73    ///
74    /// NOTE: If you are using this in a context where the input is not controlled,
75    /// it is preferred to wrap the input stream in [`DecodingReader`] or to use
76    /// [`decoded_and_normalized_value()`](Self::decoded_and_normalized_value) instead.
77    ///
78    /// </div>
79    ///
80    /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
81    /// [`DecodingReader`]: ../../encoding/struct.DecodingReader.html
82    /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
83    /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
84    pub fn normalized_value(&self, version: XmlVersion) -> XmlResult<Cow<'a, str>> {
85        // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
86        self.normalized_value_with(version, 1, resolve_predefined_entity)
87    }
88
89    /// Returns the attribute value normalized as per [the XML specification] (or [for 1.0]),
90    /// using a custom entity resolver.
91    ///
92    /// The document **must** be UTF-8 encoded, or pre-processed using [`DecodingReader`].
93    ///
94    /// Do not use this method with HTML attributes.
95    ///
96    /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
97    ///
98    /// A function for resolving entities can be provided as `resolve_entity`.
99    /// This method does not resolve any predefined entities, but you can use
100    /// [`resolve_predefined_entity`] in your function.
101    ///
102    /// This will allocate unless the raw attribute value does not require normalization.
103    ///
104    /// Note, although you may use this library to parse HTML, you cannot use this
105    /// method to get HTML content, because its returns normalized value: the following
106    /// sequences are translated into a single space (U+0020) character:
107    ///
108    /// - `\r\n`
109    /// - `\r\x85` (only XML 1.1)
110    /// - `\r`
111    /// - `\n`
112    /// - `\t`
113    /// - `\x85` (only XML 1.1)
114    /// - `\x2028` (only XML 1.1)
115    ///
116    /// The text in HTML normally is not normalized in any way; normalization is
117    /// performed only in limited contexts and [only for] `\r\n` and `\r`.
118    ///
119    /// See also [`normalized_value()`](Self::normalized_value).
120    ///
121    /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
122    ///
123    /// NOTE: If you are using this in a context where the input is not controlled,
124    /// it is preferred to wrap the input stream in [`DecodingReader`] or to use
125    /// [`decoded_and_normalized_value_with()`](Self::decoded_and_normalized_value_with) instead.
126    ///
127    /// </div>
128    ///
129    /// # Parameters
130    ///
131    /// - `depth`: maximum number of nested entities that can be expanded. If expansion
132    ///   chain will be more that this value, the function will return [`EscapeError::TooManyNestedEntities`]
133    /// - `resolve_entity`: a function to resolve entity. This function could be called
134    ///   multiple times on the same input and can return different values in each case
135    ///   for the same input, although it is not recommended
136    ///
137    /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
138    /// [`DecodingReader`]: ../../encoding/struct.DecodingReader.html
139    /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
140    /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
141    /// [`EscapeError::TooManyNestedEntities`]: crate::escape::EscapeError::TooManyNestedEntities
142    pub fn normalized_value_with<'entity>(
143        &self,
144        version: XmlVersion,
145        depth: usize,
146        resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
147    ) -> XmlResult<Cow<'a, str>> {
148        use crate::encoding::EncodingError;
149        use std::str::from_utf8;
150
151        let decoded = match &self.value {
152            Cow::Borrowed(bytes) => Cow::Borrowed(from_utf8(bytes).map_err(EncodingError::Utf8)?),
153            // Convert to owned, because otherwise Cow will be bound with wrong lifetime
154            Cow::Owned(bytes) => {
155                Cow::Owned(from_utf8(bytes).map_err(EncodingError::Utf8)?.to_owned())
156            }
157        };
158
159        match version.normalize_attribute_value(&decoded, depth, resolve_entity)? {
160            // Because result is borrowed, no replacements was done and we can use original string
161            Cow::Borrowed(_) => Ok(decoded),
162            Cow::Owned(s) => Ok(s.into()),
163        }
164    }
165
166    /// Decodes using a provided reader and returns the attribute value normalized
167    /// as per [the XML specification] (or [for 1.0]).
168    ///
169    /// Do not use this method with HTML attributes.
170    ///
171    /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
172    ///
173    /// The following escape sequences are replaced with their unescaped equivalents:
174    ///
175    /// | Escape Sequence | Replacement
176    /// |-----------------|------------
177    /// | `&lt;`          | `<`
178    /// | `&gt;`          | `>`
179    /// | `&amp;`         | `&`
180    /// | `&apos;`        | `'`
181    /// | `&quot;`        | `"`
182    ///
183    /// This will allocate unless the raw attribute value does not require normalization.
184    ///
185    /// Note, although you may use this library to parse HTML, you cannot use this
186    /// method to get HTML content, because its returns normalized value: the following
187    /// sequences are translated into a single space (U+0020) character:
188    ///
189    /// - `\r\n`
190    /// - `\r\x85` (only XML 1.1)
191    /// - `\r`
192    /// - `\n`
193    /// - `\t`
194    /// - `\x85` (only XML 1.1)
195    /// - `\x2028` (only XML 1.1)
196    ///
197    /// The text in HTML normally is not normalized in any way; normalization is
198    /// performed only in limited contexts and [only for] `\r\n` and `\r`.
199    ///
200    /// See also [`decoded_and_normalized_value_with()`](#method.decoded_and_normalized_value_with)
201    ///
202    /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
203    /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
204    /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
205    pub fn decoded_and_normalized_value(
206        &self,
207        version: XmlVersion,
208        decoder: Decoder,
209    ) -> XmlResult<Cow<'a, str>> {
210        // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
211        self.decoded_and_normalized_value_with(version, decoder, 1, resolve_predefined_entity)
212    }
213
214    /// Decodes using a provided reader and returns the attribute value normalized
215    /// as per [the XML specification] (or [for 1.0]), using a custom entity resolver.
216    ///
217    /// Do not use this method with HTML attributes.
218    ///
219    /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
220    ///
221    /// A function for resolving entities can be provided as `resolve_entity`.
222    /// This method does not resolve any predefined entities, but you can use
223    /// [`resolve_predefined_entity`] in your function.
224    ///
225    /// This will allocate unless the raw attribute value does not require normalization.
226    ///
227    /// Note, although you may use this library to parse HTML, you cannot use this
228    /// method to get HTML content, because its returns normalized value: the following
229    /// sequences are translated into a single space (U+0020) character:
230    ///
231    /// - `\r\n`
232    /// - `\r\x85` (only XML 1.1)
233    /// - `\r`
234    /// - `\n`
235    /// - `\t`
236    /// - `\x85` (only XML 1.1)
237    /// - `\x2028` (only XML 1.1)
238    ///
239    /// The text in HTML normally is not normalized in any way; normalization is
240    /// performed only in limited contexts and [only for] `\r\n` and `\r`.
241    ///
242    /// See also [`decoded_and_normalized_value()`](#method.decoded_and_normalized_value)
243    ///
244    /// # Parameters
245    ///
246    /// - `depth`: maximum number of nested entities that can be expanded. If expansion
247    ///   chain will be more that this value, the function will return [`EscapeError::TooManyNestedEntities`]
248    /// - `resolve_entity`: a function to resolve entity. This function could be called
249    ///   multiple times on the same input and can return different values in each case
250    ///   for the same input, although it is not recommended
251    ///
252    /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
253    /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
254    /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
255    /// [`EscapeError::TooManyNestedEntities`]: crate::escape::EscapeError::TooManyNestedEntities
256    pub fn decoded_and_normalized_value_with<'entity>(
257        &self,
258        version: XmlVersion,
259        decoder: Decoder,
260        depth: usize,
261        resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
262    ) -> XmlResult<Cow<'a, str>> {
263        let decoded = match &self.value {
264            Cow::Borrowed(bytes) => decoder.decode(bytes)?,
265            // Convert to owned, because otherwise Cow will be bound with wrong lifetime
266            Cow::Owned(bytes) => decoder.decode(bytes)?.into_owned().into(),
267        };
268
269        match version.normalize_attribute_value(&decoded, depth, resolve_entity)? {
270            // Because result is borrowed, no replacements was done and we can use original string
271            Cow::Borrowed(_) => Ok(decoded),
272            Cow::Owned(s) => Ok(s.into()),
273        }
274    }
275
276    /// Returns the unescaped value.
277    ///
278    /// This is normally the value you are interested in. Escape sequences such as `&gt;` are
279    /// replaced with their unescaped equivalents such as `>`.
280    ///
281    /// This will allocate if the value contains any escape sequences.
282    ///
283    /// See also [`unescape_value_with()`](Self::unescape_value_with)
284    ///
285    /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
286    ///
287    /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled,
288    /// should only be used by applications.
289    /// Libs should use [`decoded_and_normalized_value()`](Self::decoded_and_normalized_value)
290    /// instead, because if lib will be used in a project which depends on quick_xml with
291    /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification].
292    ///
293    /// </div>
294    ///
295    /// [`encoding`]: ../../index.html#encoding
296    /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification
297    #[cfg(any(doc, not(feature = "encoding")))]
298    #[deprecated = "use `Self::normalized_value()`"]
299    pub fn unescape_value(&self) -> XmlResult<Cow<'a, str>> {
300        // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
301        self.normalized_value_with(XmlVersion::Implicit1_0, 1, resolve_predefined_entity)
302    }
303
304    /// Decodes using UTF-8 then unescapes the value, using custom entities.
305    ///
306    /// This is normally the value you are interested in. Escape sequences such as `&gt;` are
307    /// replaced with their unescaped equivalents such as `>`.
308    /// A fallback resolver for additional custom entities can be provided via
309    /// `resolve_entity`.
310    ///
311    /// This will allocate if the value contains any escape sequences.
312    ///
313    /// See also [`unescape_value()`](Self::unescape_value)
314    ///
315    /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
316    ///
317    /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled,
318    /// should only be used by applications.
319    /// Libs should use [`decoded_and_normalized_value_with()`](Self::decoded_and_normalized_value_with)
320    /// instead, because if lib will be used in a project which depends on quick_xml with
321    /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification].
322    ///
323    /// </div>
324    ///
325    /// [`encoding`]: ../../index.html#encoding
326    /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification
327    #[cfg(any(doc, not(feature = "encoding")))]
328    #[deprecated = "use `Self::normalized_value_with()`"]
329    #[inline]
330    pub fn unescape_value_with<'entity>(
331        &self,
332        resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
333    ) -> XmlResult<Cow<'a, str>> {
334        self.normalized_value_with(XmlVersion::Implicit1_0, 128, resolve_entity)
335    }
336
337    /// Decodes then unescapes the value.
338    ///
339    /// This will allocate if the value contains any escape sequences or in
340    /// non-UTF-8 encoding.
341    #[deprecated = "use `Self::decoded_and_normalized_value()`"]
342    pub fn decode_and_unescape_value(&self, decoder: Decoder) -> XmlResult<Cow<'a, str>> {
343        // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
344        self.decoded_and_normalized_value_with(
345            XmlVersion::Implicit1_0,
346            decoder,
347            1,
348            resolve_predefined_entity,
349        )
350    }
351
352    /// Decodes then unescapes the value with custom entities.
353    ///
354    /// This will allocate if the value contains any escape sequences or in
355    /// non-UTF-8 encoding.
356    #[deprecated = "use `Self::decoded_and_normalized_value_with()`"]
357    pub fn decode_and_unescape_value_with<'entity>(
358        &self,
359        decoder: Decoder,
360        resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
361    ) -> XmlResult<Cow<'a, str>> {
362        self.decoded_and_normalized_value_with(
363            XmlVersion::Implicit1_0,
364            decoder,
365            128,
366            resolve_entity,
367        )
368    }
369
370    /// If attribute value [represents] valid boolean values, returns `Some`, otherwise returns `None`.
371    ///
372    /// The valid boolean representations are only `"true"`, `"false"`, `"1"`, and `"0"`.
373    ///
374    /// # Examples
375    ///
376    /// ```
377    /// # use pretty_assertions::assert_eq;
378    /// use quick_xml::events::attributes::Attribute;
379    ///
380    /// let attr = Attribute::from(("attr", "false"));
381    /// assert_eq!(attr.as_bool(), Some(false));
382    ///
383    /// let attr = Attribute::from(("attr", "0"));
384    /// assert_eq!(attr.as_bool(), Some(false));
385    ///
386    /// let attr = Attribute::from(("attr", "true"));
387    /// assert_eq!(attr.as_bool(), Some(true));
388    ///
389    /// let attr = Attribute::from(("attr", "1"));
390    /// assert_eq!(attr.as_bool(), Some(true));
391    ///
392    /// let attr = Attribute::from(("attr", "not bool"));
393    /// assert_eq!(attr.as_bool(), None);
394    /// ```
395    ///
396    /// [represents]: https://www.w3.org/TR/xmlschema11-2/#boolean
397    #[inline]
398    pub fn as_bool(&self) -> Option<bool> {
399        match self.value.as_ref() {
400            b"1" | b"true" => Some(true),
401            b"0" | b"false" => Some(false),
402            _ => None,
403        }
404    }
405}
406
407impl<'a> Debug for Attribute<'a> {
408    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
409        f.debug_struct("Attribute")
410            .field("key", &Bytes(self.key.as_ref()))
411            .field("value", &Bytes(&self.value))
412            .finish()
413    }
414}
415
416impl<'a> From<(&'a [u8], &'a [u8])> for Attribute<'a> {
417    /// Creates new attribute from raw bytes.
418    /// Does not apply any transformation to both key and value.
419    ///
420    /// # Examples
421    ///
422    /// ```
423    /// # use pretty_assertions::assert_eq;
424    /// use quick_xml::events::attributes::Attribute;
425    ///
426    /// let features = Attribute::from(("features".as_bytes(), "Bells &amp; whistles".as_bytes()));
427    /// assert_eq!(features.value, "Bells &amp; whistles".as_bytes());
428    /// ```
429    fn from(val: (&'a [u8], &'a [u8])) -> Attribute<'a> {
430        Attribute {
431            key: QName(val.0),
432            value: Cow::from(val.1),
433        }
434    }
435}
436
437impl<'a> From<(&'a str, &'a str)> for Attribute<'a> {
438    /// Creates new attribute from text representation.
439    /// Key is stored as-is, but the value will be escaped.
440    ///
441    /// # Examples
442    ///
443    /// ```
444    /// # use pretty_assertions::assert_eq;
445    /// use quick_xml::events::attributes::Attribute;
446    ///
447    /// let features = Attribute::from(("features", "Bells & whistles"));
448    /// assert_eq!(features.value, "Bells &amp; whistles".as_bytes());
449    /// ```
450    fn from(val: (&'a str, &'a str)) -> Attribute<'a> {
451        Attribute {
452            key: QName(val.0.as_bytes()),
453            value: match escape(val.1) {
454                Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
455                Cow::Owned(s) => Cow::Owned(s.into_bytes()),
456            },
457        }
458    }
459}
460
461impl<'a> From<(&'a str, Cow<'a, str>)> for Attribute<'a> {
462    /// Creates new attribute from text representation.
463    /// Key is stored as-is, but the value will be escaped.
464    ///
465    /// # Examples
466    ///
467    /// ```
468    /// # use std::borrow::Cow;
469    /// use pretty_assertions::assert_eq;
470    /// use quick_xml::events::attributes::Attribute;
471    ///
472    /// let features = Attribute::from(("features", Cow::Borrowed("Bells & whistles")));
473    /// assert_eq!(features.value, "Bells &amp; whistles".as_bytes());
474    /// ```
475    fn from(val: (&'a str, Cow<'a, str>)) -> Attribute<'a> {
476        Attribute {
477            key: QName(val.0.as_bytes()),
478            value: match escape(val.1) {
479                Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
480                Cow::Owned(s) => Cow::Owned(s.into_bytes()),
481            },
482        }
483    }
484}
485
486impl<'a> From<Attr<&'a [u8]>> for Attribute<'a> {
487    #[inline]
488    fn from(attr: Attr<&'a [u8]>) -> Self {
489        Self {
490            key: attr.key(),
491            value: Cow::Borrowed(attr.value()),
492        }
493    }
494}
495
496////////////////////////////////////////////////////////////////////////////////////////////////////
497
498/// Iterator over XML attributes.
499///
500/// Yields `Result<Attribute>`. An `Err` will be yielded if an attribute is malformed or duplicated.
501/// The duplicate check can be turned off by calling [`with_checks(false)`].
502///
503/// When [`serialize`] feature is enabled, can be converted to serde's deserializer.
504///
505/// [`with_checks(false)`]: Self::with_checks
506/// [`serialize`]: ../../index.html#serialize
507#[derive(Clone)]
508pub struct Attributes<'a> {
509    /// Slice of `BytesStart` corresponding to attributes
510    bytes: &'a [u8],
511    /// Iterator state, independent from the actual source of bytes
512    state: IterState,
513    /// Encoding used for `bytes`
514    decoder: Decoder,
515}
516
517impl<'a> Attributes<'a> {
518    /// Internal constructor, used by `BytesStart`. Supplies data in reader's encoding
519    #[inline]
520    pub(crate) const fn wrap(buf: &'a [u8], pos: usize, html: bool, decoder: Decoder) -> Self {
521        Self {
522            bytes: buf,
523            state: IterState::new(pos, html),
524            decoder,
525        }
526    }
527
528    /// Creates a new attribute iterator from a buffer, which recognizes only XML-style
529    /// attributes, i. e. those which in the form `name = "value"` or `name = 'value'`.
530    /// HTML style attributes (i. e. without quotes or only name) will return a error.
531    ///
532    /// # Parameters
533    /// - `buf`: a buffer with a tag name and attributes, usually this is the whole
534    ///   string between `<` and `>` (or `/>`) of a tag;
535    /// - `pos`: a position in the `buf` where tag name is finished and attributes
536    ///   is started. It is not necessary to point exactly to the end of a tag name,
537    ///   although that is usually that. If it will be more than the `buf` length,
538    ///   then the iterator will return `None`` immediately.
539    ///
540    /// # Example
541    /// ```
542    /// # use quick_xml::events::attributes::{Attribute, Attributes};
543    /// # use pretty_assertions::assert_eq;
544    /// #
545    /// let mut iter = Attributes::new("tag-name attr1 = 'value1' attr2='value2' ", 9);
546    /// //                              ^0       ^9
547    /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1")))));
548    /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "value2")))));
549    /// assert_eq!(iter.next(), None);
550    /// ```
551    pub const fn new(buf: &'a str, pos: usize) -> Self {
552        Self::wrap(buf.as_bytes(), pos, false, Decoder::utf8())
553    }
554
555    /// Creates a new attribute iterator from a buffer, allowing HTML attribute syntax.
556    ///
557    /// # Parameters
558    /// - `buf`: a buffer with a tag name and attributes, usually this is the whole
559    ///   string between `<` and `>` (or `/>`) of a tag;
560    /// - `pos`: a position in the `buf` where tag name is finished and attributes
561    ///   is started. It is not necessary to point exactly to the end of a tag name,
562    ///   although that is usually that. If it will be more than the `buf` length,
563    ///   then the iterator will return `None`` immediately.
564    ///
565    /// # Example
566    /// ```
567    /// # use quick_xml::events::attributes::{Attribute, Attributes};
568    /// # use pretty_assertions::assert_eq;
569    /// #
570    /// let mut iter = Attributes::html("tag-name attr1 = value1 attr2 ", 9);
571    /// //                               ^0       ^9
572    /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1")))));
573    /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "")))));
574    /// assert_eq!(iter.next(), None);
575    /// ```
576    pub const fn html(buf: &'a str, pos: usize) -> Self {
577        Self::wrap(buf.as_bytes(), pos, true, Decoder::utf8())
578    }
579
580    /// Changes whether attributes should be checked for uniqueness.
581    ///
582    /// The XML specification requires attribute keys in the same element to be unique. This check
583    /// can be disabled to improve performance slightly.
584    ///
585    /// (`true` by default)
586    pub fn with_checks(&mut self, val: bool) -> &mut Attributes<'a> {
587        self.state.check_duplicates = val;
588        self
589    }
590
591    /// Checks if the current tag has a [`xsi:nil`] attribute. This method ignores any errors in
592    /// attributes.
593    ///
594    /// # Examples
595    ///
596    /// ```
597    /// # use pretty_assertions::assert_eq;
598    /// use quick_xml::events::Event;
599    /// use quick_xml::name::QName;
600    /// use quick_xml::reader::NsReader;
601    ///
602    /// let mut reader = NsReader::from_str("
603    ///     <root xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'>
604    ///         <true xsi:nil='true'/>
605    ///         <false xsi:nil='false'/>
606    ///         <none/>
607    ///         <non-xsi xsi:nil='true' xmlns:xsi='namespace'/>
608    ///         <unbound-nil nil='true' xmlns='http://www.w3.org/2001/XMLSchema-instance'/>
609    ///         <another-xmlns f:nil='true' xmlns:f='http://www.w3.org/2001/XMLSchema-instance'/>
610    ///     </root>
611    /// ");
612    /// reader.config_mut().trim_text(true);
613    ///
614    /// macro_rules! check {
615    ///     ($reader:expr, $name:literal, $value:literal) => {
616    ///         let event = match $reader.read_event().unwrap() {
617    ///             Event::Empty(e) => e,
618    ///             e => panic!("Unexpected event {:?}", e),
619    ///         };
620    ///         assert_eq!(
621    ///             (event.name(), event.attributes().has_nil($reader.resolver())),
622    ///             (QName($name.as_bytes()), $value),
623    ///         );
624    ///     };
625    /// }
626    ///
627    /// let root = match reader.read_event().unwrap() {
628    ///     Event::Start(e) => e,
629    ///     e => panic!("Unexpected event {:?}", e),
630    /// };
631    /// assert_eq!(root.attributes().has_nil(reader.resolver()), false);
632    ///
633    /// // definitely true
634    /// check!(reader, "true",          true);
635    /// // definitely false
636    /// check!(reader, "false",         false);
637    /// // absence of the attribute means that attribute is not set
638    /// check!(reader, "none",          false);
639    /// // attribute not bound to the correct namespace
640    /// check!(reader, "non-xsi",       false);
641    /// // attributes without prefix not bound to any namespace
642    /// check!(reader, "unbound-nil",   false);
643    /// // prefix can be any while it is bound to the correct namespace
644    /// check!(reader, "another-xmlns", true);
645    /// ```
646    ///
647    /// [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil
648    pub fn has_nil(&mut self, resolver: &NamespaceResolver) -> bool {
649        use crate::name::ResolveResult::*;
650
651        self.any(|attr| {
652            if let Ok(attr) = attr {
653                match resolver.resolve_attribute(attr.key) {
654                    (
655                        Bound(Namespace(b"http://www.w3.org/2001/XMLSchema-instance")),
656                        LocalName(b"nil"),
657                    ) => attr.as_bool().unwrap_or_default(),
658                    _ => false,
659                }
660            } else {
661                false
662            }
663        })
664    }
665
666    /// Get the decoder, used to decode bytes, read by the reader which produces
667    /// this iterator, to the strings.
668    ///
669    /// When iterator was created manually or get from a manually created [`BytesStart`],
670    /// encoding is UTF-8.
671    ///
672    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
673    /// defaults to UTF-8.
674    ///
675    /// [`BytesStart`]: crate::events::BytesStart
676    /// [`encoding`]: ../index.html#encoding
677    #[inline]
678    pub const fn decoder(&self) -> Decoder {
679        self.decoder
680    }
681}
682
683impl<'a> Debug for Attributes<'a> {
684    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
685        f.debug_struct("Attributes")
686            .field("bytes", &Bytes(self.bytes))
687            .field("state", &self.state)
688            .field("decoder", &self.decoder)
689            .finish()
690    }
691}
692
693impl<'a> Iterator for Attributes<'a> {
694    type Item = Result<Attribute<'a>, AttrError>;
695
696    #[inline]
697    fn next(&mut self) -> Option<Self::Item> {
698        match self.state.next(self.bytes) {
699            None => None,
700            Some(Ok(a)) => Some(Ok(a.map(|range| &self.bytes[range]).into())),
701            Some(Err(e)) => Some(Err(e)),
702        }
703    }
704}
705
706impl<'a> FusedIterator for Attributes<'a> {}
707
708////////////////////////////////////////////////////////////////////////////////////////////////////
709
710/// Errors that can be raised during parsing attributes.
711///
712/// Recovery position in examples shows the position from which parsing of the
713/// next attribute will be attempted.
714#[derive(Clone, Debug, PartialEq, Eq)]
715pub enum AttrError {
716    /// Attribute key was not followed by `=`, position relative to the start of
717    /// the owning tag is provided.
718    ///
719    /// Example of input that raises this error:
720    ///
721    /// ```xml
722    /// <tag key another="attribute"/>
723    /// <!--     ^~~ error position, recovery position (8) -->
724    /// ```
725    ///
726    /// This error can be raised only when the iterator is in XML mode.
727    ExpectedEq(usize),
728    /// Attribute value was not found after `=`, position relative to the start
729    /// of the owning tag is provided.
730    ///
731    /// Example of input that raises this error:
732    ///
733    /// ```xml
734    /// <tag key = />
735    /// <!--       ^~~ error position, recovery position (10) -->
736    /// ```
737    ///
738    /// This error can be returned only for the last attribute in the list,
739    /// because otherwise any content after `=` will be threated as a value.
740    /// The XML
741    ///
742    /// ```xml
743    /// <tag key = another-key = "value"/>
744    /// <!--                   ^ ^- recovery position (24) -->
745    /// <!--                   '~~ error position (22) -->
746    /// ```
747    ///
748    /// will be treated as `Attribute { key = b"key", value = b"another-key" }`
749    /// and or [`Attribute`] is returned, or [`AttrError::UnquotedValue`] is raised,
750    /// depending on the parsing mode.
751    ExpectedValue(usize),
752    /// Attribute value is not quoted, position relative to the start of the
753    /// owning tag is provided.
754    ///
755    /// Example of input that raises this error:
756    ///
757    /// ```xml
758    /// <tag key = value />
759    /// <!--       ^    ^~~ recovery position (15) -->
760    /// <!--       '~~ error position (10) -->
761    /// ```
762    ///
763    /// This error can be raised only when the iterator is in XML mode.
764    UnquotedValue(usize),
765    /// Attribute value was not finished with a matching quote, position relative
766    /// to the start of owning tag and a quote is provided. That position is always
767    /// a last character in the tag content.
768    ///
769    /// Example of input that raises this error:
770    ///
771    /// ```xml
772    /// <tag key = "value  />
773    /// <tag key = 'value  />
774    /// <!--               ^~~ error position, recovery position (18) -->
775    /// ```
776    ///
777    /// This error can be returned only for the last attribute in the list,
778    /// because all input was consumed during scanning for a quote.
779    ExpectedQuote(usize, u8),
780    /// An attribute with the same name was already encountered. Two parameters
781    /// define (1) the error position relative to the start of the owning tag
782    /// for a new attribute and (2) the start position of a previously encountered
783    /// attribute with the same name.
784    ///
785    /// Example of input that raises this error:
786    ///
787    /// ```xml
788    /// <tag key = 'value'  key="value2" attr3='value3' />
789    /// <!-- ^              ^            ^~~ recovery position (32) -->
790    /// <!-- |              '~~ error position (19) -->
791    /// <!-- '~~ previous position (4) -->
792    /// ```
793    ///
794    /// This error is returned only when [`Attributes::with_checks()`] is set
795    /// to `true` (that is default behavior).
796    Duplicated(usize, usize),
797}
798
799impl Display for AttrError {
800    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
801        match self {
802            Self::ExpectedEq(pos) => write!(
803                f,
804                r#"position {}: attribute key must be directly followed by `=` or space"#,
805                pos
806            ),
807            Self::ExpectedValue(pos) => write!(
808                f,
809                r#"position {}: `=` must be followed by an attribute value"#,
810                pos
811            ),
812            Self::UnquotedValue(pos) => write!(
813                f,
814                r#"position {}: attribute value must be enclosed in `"` or `'`"#,
815                pos
816            ),
817            Self::ExpectedQuote(pos, quote) => write!(
818                f,
819                r#"position {}: missing closing quote `{}` in attribute value"#,
820                pos, *quote as char
821            ),
822            Self::Duplicated(pos1, pos2) => write!(
823                f,
824                r#"position {}: duplicated attribute, previous declaration at position {}"#,
825                pos1, pos2
826            ),
827        }
828    }
829}
830
831impl std::error::Error for AttrError {}
832
833////////////////////////////////////////////////////////////////////////////////////////////////////
834
835/// A struct representing a key/value XML or HTML [attribute].
836///
837/// [attribute]: https://www.w3.org/TR/xml11/#NT-Attribute
838#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
839pub enum Attr<T> {
840    /// Attribute with value enclosed in double quotes (`"`). Attribute key and
841    /// value provided. This is a canonical XML-style attribute.
842    DoubleQ(T, T),
843    /// Attribute with value enclosed in single quotes (`'`). Attribute key and
844    /// value provided. This is an XML-style attribute.
845    SingleQ(T, T),
846    /// Attribute with value not enclosed in quotes. Attribute key and value
847    /// provided. This is HTML-style attribute, it can be returned in HTML-mode
848    /// parsing only. In an XML mode [`AttrError::UnquotedValue`] will be raised
849    /// instead.
850    ///
851    /// Attribute value can be invalid according to the [HTML specification],
852    /// in particular, it can contain `"`, `'`, `=`, `<`, and <code>&#96;</code>
853    /// characters. The absence of the `>` character is nevertheless guaranteed,
854    /// since the parser extracts [events] based on them even before the start
855    /// of parsing attributes.
856    ///
857    /// [HTML specification]: https://html.spec.whatwg.org/#unquoted
858    /// [events]: crate::events::Event::Start
859    Unquoted(T, T),
860    /// Attribute without value. Attribute key provided. This is HTML-style attribute,
861    /// it can be returned in HTML-mode parsing only. In XML mode
862    /// [`AttrError::ExpectedEq`] will be raised instead.
863    Empty(T),
864}
865
866impl<T> Attr<T> {
867    /// Maps an `Attr<T>` to `Attr<U>` by applying a function to a contained key and value.
868    #[inline]
869    pub fn map<U, F>(self, mut f: F) -> Attr<U>
870    where
871        F: FnMut(T) -> U,
872    {
873        match self {
874            Attr::DoubleQ(key, value) => Attr::DoubleQ(f(key), f(value)),
875            Attr::SingleQ(key, value) => Attr::SingleQ(f(key), f(value)),
876            Attr::Empty(key) => Attr::Empty(f(key)),
877            Attr::Unquoted(key, value) => Attr::Unquoted(f(key), f(value)),
878        }
879    }
880}
881
882impl<'a> Attr<&'a [u8]> {
883    /// Returns the key value
884    #[inline]
885    pub const fn key(&self) -> QName<'a> {
886        QName(match self {
887            Attr::DoubleQ(key, _) => key,
888            Attr::SingleQ(key, _) => key,
889            Attr::Empty(key) => key,
890            Attr::Unquoted(key, _) => key,
891        })
892    }
893    /// Returns the attribute value. For [`Self::Empty`] variant an empty slice
894    /// is returned according to the [HTML specification].
895    ///
896    /// [HTML specification]: https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#syntax-attr-empty
897    #[inline]
898    pub const fn value(&self) -> &'a [u8] {
899        match self {
900            Attr::DoubleQ(_, value) => value,
901            Attr::SingleQ(_, value) => value,
902            Attr::Empty(_) => &[],
903            Attr::Unquoted(_, value) => value,
904        }
905    }
906}
907
908impl<T: AsRef<[u8]>> Debug for Attr<T> {
909    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
910        match self {
911            Attr::DoubleQ(key, value) => f
912                .debug_tuple("Attr::DoubleQ")
913                .field(&Bytes(key.as_ref()))
914                .field(&Bytes(value.as_ref()))
915                .finish(),
916            Attr::SingleQ(key, value) => f
917                .debug_tuple("Attr::SingleQ")
918                .field(&Bytes(key.as_ref()))
919                .field(&Bytes(value.as_ref()))
920                .finish(),
921            Attr::Empty(key) => f
922                .debug_tuple("Attr::Empty")
923                // Comment to prevent formatting and keep style consistent
924                .field(&Bytes(key.as_ref()))
925                .finish(),
926            Attr::Unquoted(key, value) => f
927                .debug_tuple("Attr::Unquoted")
928                .field(&Bytes(key.as_ref()))
929                .field(&Bytes(value.as_ref()))
930                .finish(),
931        }
932    }
933}
934
935/// Unpacks attribute key and value into tuple of this two elements.
936/// `None` value element is returned only for [`Attr::Empty`] variant.
937impl<T> From<Attr<T>> for (T, Option<T>) {
938    #[inline]
939    fn from(attr: Attr<T>) -> Self {
940        match attr {
941            Attr::DoubleQ(key, value) => (key, Some(value)),
942            Attr::SingleQ(key, value) => (key, Some(value)),
943            Attr::Empty(key) => (key, None),
944            Attr::Unquoted(key, value) => (key, Some(value)),
945        }
946    }
947}
948
949////////////////////////////////////////////////////////////////////////////////////////////////////
950
951type AttrResult = Result<Attr<Range<usize>>, AttrError>;
952
953#[derive(Clone, Copy, Debug)]
954enum State {
955    /// Iteration finished, iterator will return `None` to all [`IterState::next`]
956    /// requests.
957    Done,
958    /// The last attribute returned was deserialized successfully. Contains an
959    /// offset from which next attribute should be searched.
960    Next(usize),
961    /// The last attribute returns [`AttrError::UnquotedValue`], offset pointed
962    /// to the beginning of the value. Recover should skip a value
963    SkipValue(usize),
964    /// The last attribute returns [`AttrError::Duplicated`], offset pointed to
965    /// the equal (`=`) sign. Recover should skip it and a value
966    SkipEqValue(usize),
967}
968
969/// Number of attributes a start tag may have before the duplicate-name check
970/// switches from a direct linear scan of the previously seen names to a hash
971/// pre-filter (see [`IterState::check_for_duplicates`]).
972///
973/// Real-world start tags carry only a handful of attributes -- the busiest
974/// element in our benchmark corpus (`tests/documents/players.xml`) has 22 --
975/// where the scan is faster than hashing and needs no allocation. Larger tags
976/// are where the scan became the O(N²) CPU-DoS of [#969], so above this count we
977/// pay for a hash set to keep the whole tag O(N). The value sits just above the
978/// measured linear-vs-hash crossover.
979///
980/// [#969]: https://github.com/tafia/quick-xml/issues/969
981const SMALL_ATTRIBUTE_COUNT: usize = 32;
982
983/// A no-op [`Hasher`] for the `key_hashes` set, whose values are already 64-bit
984/// hashes of attribute names; re-hashing them with the default SipHash would be
985/// wasted work. Only `write_u64` is ever exercised (via `u64`'s `Hash` impl).
986#[derive(Default)]
987struct IdentityHasher(u64);
988
989impl Hasher for IdentityHasher {
990    #[inline]
991    fn finish(&self) -> u64 {
992        self.0
993    }
994
995    #[inline]
996    fn write(&mut self, _: &[u8]) {
997        // The set only ever stores `u64` keys, which route through `write_u64`.
998        unreachable!("IdentityHasher only supports u64 keys")
999    }
1000
1001    #[inline]
1002    fn write_u64(&mut self, n: u64) {
1003        self.0 = n;
1004    }
1005}
1006
1007/// Hashes a single attribute name. A fresh [`DefaultHasher`] per name keeps each
1008/// hash independent (so it is also DoS-resistant on untrusted input).
1009#[inline]
1010fn hash_name(name: &[u8]) -> u64 {
1011    let mut hasher = DefaultHasher::new();
1012    hasher.write(name);
1013    hasher.finish()
1014}
1015
1016/// External iterator over spans of attribute key and value
1017#[derive(Clone, Debug)]
1018pub(crate) struct IterState {
1019    /// Iteration state that determines what actions should be done before the
1020    /// actual parsing of the next attribute
1021    state: State,
1022    /// If `true`, enables ability to parse unquoted values and key-only (empty)
1023    /// attributes
1024    html: bool,
1025    /// If `true`, checks for duplicate names
1026    check_duplicates: bool,
1027    /// If `check_duplicates` is set, contains the ranges of already parsed attribute
1028    /// names. We store a ranges instead of slices to able to report a previous
1029    /// attribute position
1030    keys: Vec<Range<usize>>,
1031    /// 64-bit hashes of the byte content of `keys`, used as an O(1) pre-filter
1032    /// once a start tag declares more than `SMALL_ATTRIBUTE_COUNT` attributes, so
1033    /// the duplicate check stays O(N) over the whole tag instead of O(N²). The
1034    /// values are already hashes, so the set stores them with `IdentityHasher`
1035    /// instead of re-hashing. Allocated only when the threshold is crossed, so
1036    /// small tags (and [`IterState::new`]) stay allocation-free and `const`.
1037    key_hashes: Option<HashSet<u64, BuildHasherDefault<IdentityHasher>>>,
1038}
1039
1040impl IterState {
1041    pub const fn new(offset: usize, html: bool) -> Self {
1042        Self {
1043            state: State::Next(offset),
1044            html,
1045            check_duplicates: true,
1046            keys: Vec::new(),
1047            key_hashes: None,
1048        }
1049    }
1050
1051    /// Recover from an error that could have been made on a previous step.
1052    /// Returns an offset from which parsing should continue.
1053    /// If there no input left, returns `None`.
1054    fn recover(&self, slice: &[u8]) -> Option<usize> {
1055        match self.state {
1056            State::Done => None,
1057            State::Next(offset) => Some(offset),
1058            State::SkipValue(offset) => self.skip_value(slice, offset),
1059            State::SkipEqValue(offset) => self.skip_eq_value(slice, offset),
1060        }
1061    }
1062
1063    /// Skip all characters up to first space symbol or end-of-input
1064    #[inline]
1065    #[allow(clippy::manual_map)]
1066    fn skip_value(&self, slice: &[u8], offset: usize) -> Option<usize> {
1067        let mut iter = (offset..).zip(slice[offset..].iter());
1068
1069        match iter.find(|(_, &b)| is_whitespace(b)) {
1070            // Input: `    key  =  value `
1071            //                     |    ^
1072            //                offset    e
1073            Some((e, _)) => Some(e),
1074            // Input: `    key  =  value`
1075            //                     |    ^
1076            //                offset    e = len()
1077            None => None,
1078        }
1079    }
1080
1081    /// Skip all characters up to first space symbol or end-of-input
1082    #[inline]
1083    fn skip_eq_value(&self, slice: &[u8], offset: usize) -> Option<usize> {
1084        let mut iter = (offset..).zip(slice[offset..].iter());
1085
1086        // Skip all up to the quote and get the quote type
1087        let quote = match iter.find(|(_, &b)| !is_whitespace(b)) {
1088            // Input: `    key  =  "`
1089            //                  |  ^
1090            //             offset
1091            Some((_, b'"')) => b'"',
1092            // Input: `    key  =  '`
1093            //                  |  ^
1094            //             offset
1095            Some((_, b'\'')) => b'\'',
1096
1097            // Input: `    key  =  x`
1098            //                  |  ^
1099            //             offset
1100            Some((offset, _)) => return self.skip_value(slice, offset),
1101            // Input: `    key  =  `
1102            //                  |  ^
1103            //             offset
1104            None => return None,
1105        };
1106
1107        match iter.find(|(_, &b)| b == quote) {
1108            // Input: `    key  =  "   "`
1109            //                         ^
1110            Some((e, b'"')) => Some(e),
1111            // Input: `    key  =  '   '`
1112            //                         ^
1113            Some((e, _)) => Some(e),
1114
1115            // Input: `    key  =  "   `
1116            // Input: `    key  =  '   `
1117            //                         ^
1118            // Closing quote not found
1119            None => None,
1120        }
1121    }
1122
1123    /// Checks that the attribute name `key` (a range into `slice`) was not seen
1124    /// earlier in the same start tag, recording it for subsequent checks.
1125    ///
1126    /// Small tags use a direct linear scan of [`Self::keys`]: for a handful of
1127    /// attributes that beats hashing and needs no allocation, which is the
1128    /// overwhelmingly common case. Once a tag declares more than
1129    /// `SMALL_ATTRIBUTE_COUNT` attributes -- where the scan would become the
1130    /// O(N²) CPU-DoS of [#969] -- it switches to a hash pre-filter that keeps the
1131    /// whole tag O(N).
1132    ///
1133    /// [#969]: https://github.com/tafia/quick-xml/issues/969
1134    #[inline]
1135    fn check_for_duplicates(
1136        &mut self,
1137        slice: &[u8],
1138        key: Range<usize>,
1139    ) -> Result<Range<usize>, AttrError> {
1140        if self.check_duplicates {
1141            if self.keys.len() >= SMALL_ATTRIBUTE_COUNT {
1142                return self.check_for_duplicates_hashed(slice, key);
1143            }
1144            if let Some(prev) = self
1145                .keys
1146                .iter()
1147                .find(|r| slice[(*r).clone()] == slice[key.clone()])
1148            {
1149                return Err(AttrError::Duplicated(key.start, prev.start));
1150            }
1151            self.keys.push(key.clone());
1152        }
1153        Ok(key)
1154    }
1155
1156    /// Cold path of [`Self::check_for_duplicates`] for start tags with many
1157    /// attributes: a [`HashSet`] of 64-bit name hashes acts as an O(1) pre-filter
1158    /// so iterating N attributes is O(N) rather than O(N²).
1159    #[cold]
1160    fn check_for_duplicates_hashed(
1161        &mut self,
1162        slice: &[u8],
1163        key: Range<usize>,
1164    ) -> Result<Range<usize>, AttrError> {
1165        let keys = &self.keys;
1166        let key_hashes = self.key_hashes.get_or_insert_with(|| {
1167            // First time over the threshold: seed the set with the names already
1168            // collected during the linear phase so the pre-filter knows them.
1169            let mut set = HashSet::with_capacity_and_hasher(
1170                keys.len() * 2,
1171                BuildHasherDefault::<IdentityHasher>::default(),
1172            );
1173            for r in keys {
1174                set.insert(hash_name(&slice[r.clone()]));
1175            }
1176            set
1177        });
1178        // A fresh hash proves the name is new. On a hit (a real duplicate, or the
1179        // astronomically rare 64-bit collision) fall back to the linear scan to
1180        // recover the exact previous position for `AttrError::Duplicated`.
1181        if !key_hashes.insert(hash_name(&slice[key.clone()])) {
1182            if let Some(prev) = self
1183                .keys
1184                .iter()
1185                .find(|r| slice[(*r).clone()] == slice[key.clone()])
1186            {
1187                return Err(AttrError::Duplicated(key.start, prev.start));
1188            }
1189        }
1190        self.keys.push(key.clone());
1191        Ok(key)
1192    }
1193
1194    /// # Parameters
1195    ///
1196    /// - `slice`: content of the tag, used for checking for duplicates
1197    /// - `key`: Range of key in slice, if iterator in HTML mode
1198    /// - `offset`: Position of error if iterator in XML mode
1199    #[inline]
1200    fn key_only(&mut self, slice: &[u8], key: Range<usize>, offset: usize) -> Option<AttrResult> {
1201        Some(if self.html {
1202            self.check_for_duplicates(slice, key).map(Attr::Empty)
1203        } else {
1204            Err(AttrError::ExpectedEq(offset))
1205        })
1206    }
1207
1208    #[inline]
1209    fn double_q(&mut self, key: Range<usize>, value: Range<usize>) -> Option<AttrResult> {
1210        self.state = State::Next(value.end + 1); // +1 for `"`
1211
1212        Some(Ok(Attr::DoubleQ(key, value)))
1213    }
1214
1215    #[inline]
1216    fn single_q(&mut self, key: Range<usize>, value: Range<usize>) -> Option<AttrResult> {
1217        self.state = State::Next(value.end + 1); // +1 for `'`
1218
1219        Some(Ok(Attr::SingleQ(key, value)))
1220    }
1221
1222    pub fn next(&mut self, slice: &[u8]) -> Option<AttrResult> {
1223        let mut iter = match self.recover(slice) {
1224            Some(offset) => (offset..).zip(slice[offset..].iter()),
1225            None => return None,
1226        };
1227
1228        // Index where next key started
1229        let start_key = match iter.find(|(_, &b)| !is_whitespace(b)) {
1230            // Input: `    key`
1231            //             ^
1232            Some((s, _)) => s,
1233            // Input: `    `
1234            //             ^
1235            None => {
1236                // Because we reach end-of-input, stop iteration on next call
1237                self.state = State::Done;
1238                return None;
1239            }
1240        };
1241        // Span of a key
1242        let (key, offset) = match iter.find(|(_, &b)| b == b'=' || is_whitespace(b)) {
1243            // Input: `    key=`
1244            //             |  ^
1245            //             s  e
1246            Some((e, b'=')) => (start_key..e, e),
1247
1248            // Input: `    key `
1249            //                ^
1250            Some((e, _)) => match iter.find(|(_, &b)| !is_whitespace(b)) {
1251                // Input: `    key  =`
1252                //             |  | ^
1253                //     start_key  e
1254                Some((offset, b'=')) => (start_key..e, offset),
1255                // Input: `    key  x`
1256                //             |  | ^
1257                //     start_key  e
1258                // If HTML-like attributes is allowed, this is the result, otherwise error
1259                Some((offset, _)) => {
1260                    // In any case, recovering is not required
1261                    self.state = State::Next(offset);
1262                    return self.key_only(slice, start_key..e, offset);
1263                }
1264                // Input: `    key  `
1265                //             |  | ^
1266                //     start_key  e
1267                // If HTML-like attributes is allowed, this is the result, otherwise error
1268                None => {
1269                    // Because we reach end-of-input, stop iteration on next call
1270                    self.state = State::Done;
1271                    return self.key_only(slice, start_key..e, slice.len());
1272                }
1273            },
1274
1275            // Input: `    key`
1276            //             |  ^
1277            //             s  e = len()
1278            // If HTML-like attributes is allowed, this is the result, otherwise error
1279            None => {
1280                // Because we reach end-of-input, stop iteration on next call
1281                self.state = State::Done;
1282                let e = slice.len();
1283                return self.key_only(slice, start_key..e, e);
1284            }
1285        };
1286
1287        let key = match self.check_for_duplicates(slice, key) {
1288            Err(e) => {
1289                self.state = State::SkipEqValue(offset);
1290                return Some(Err(e));
1291            }
1292            Ok(key) => key,
1293        };
1294
1295        ////////////////////////////////////////////////////////////////////////
1296
1297        // Gets the position of quote and quote type
1298        let (start_value, quote) = match iter.find(|(_, &b)| !is_whitespace(b)) {
1299            // Input: `    key  =  "`
1300            //                     ^
1301            Some((s, b'"')) => (s + 1, b'"'),
1302            // Input: `    key  =  '`
1303            //                     ^
1304            Some((s, b'\'')) => (s + 1, b'\''),
1305
1306            // Input: `    key  =  x`
1307            //                     ^
1308            // If HTML-like attributes is allowed, this is the start of the value
1309            Some((s, _)) if self.html => {
1310                // We do not check validity of attribute value characters as required
1311                // according to https://html.spec.whatwg.org/#unquoted. It can be done
1312                // during validation phase
1313                let end = match iter.find(|(_, &b)| is_whitespace(b)) {
1314                    // Input: `    key  =  value `
1315                    //                     |    ^
1316                    //                     s    e
1317                    Some((e, _)) => e,
1318                    // Input: `    key  =  value`
1319                    //                     |    ^
1320                    //                     s    e = len()
1321                    None => slice.len(),
1322                };
1323                self.state = State::Next(end);
1324                return Some(Ok(Attr::Unquoted(key, s..end)));
1325            }
1326            // Input: `    key  =  x`
1327            //                     ^
1328            Some((s, _)) => {
1329                self.state = State::SkipValue(s);
1330                return Some(Err(AttrError::UnquotedValue(s)));
1331            }
1332
1333            // Input: `    key  =  `
1334            //                     ^
1335            None => {
1336                // Because we reach end-of-input, stop iteration on next call
1337                self.state = State::Done;
1338                return Some(Err(AttrError::ExpectedValue(slice.len())));
1339            }
1340        };
1341
1342        match iter.find(|(_, &b)| b == quote) {
1343            // Input: `    key  =  "   "`
1344            //                         ^
1345            Some((e, b'"')) => self.double_q(key, start_value..e),
1346            // Input: `    key  =  '   '`
1347            //                         ^
1348            Some((e, _)) => self.single_q(key, start_value..e),
1349
1350            // Input: `    key  =  "   `
1351            // Input: `    key  =  '   `
1352            //                         ^
1353            // Closing quote not found
1354            None => {
1355                // Because we reach end-of-input, stop iteration on next call
1356                self.state = State::Done;
1357                Some(Err(AttrError::ExpectedQuote(slice.len(), quote)))
1358            }
1359        }
1360    }
1361}
1362
1363////////////////////////////////////////////////////////////////////////////////////////////////////
1364
1365/// Checks, how parsing of XML-style attributes works. Each attribute should
1366/// have a value, enclosed in single or double quotes.
1367#[cfg(test)]
1368mod xml {
1369    use super::*;
1370    use pretty_assertions::assert_eq;
1371
1372    mod attribute_value_normalization {
1373        use super::*;
1374        use crate::errors::Error;
1375        use crate::escape::EscapeError::*;
1376        use crate::XmlVersion::*;
1377        use pretty_assertions::assert_eq;
1378
1379        /// Empty values returned are unchanged
1380        #[test]
1381        fn empty() {
1382            let raw_value = "".as_bytes();
1383            let attr = Attribute::from(("foo".as_bytes(), raw_value));
1384
1385            let value = attr
1386                .decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1387                .unwrap();
1388            assert_eq!(value, "");
1389            // assert_eq! does not check if value is borrowed, but this is important
1390            assert!(matches!(value, Cow::Borrowed(_)));
1391
1392            let value = attr
1393                .decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1394                .unwrap();
1395            assert_eq!(value, "");
1396            // assert_eq! does not check if value is borrowed, but this is important
1397            assert!(matches!(value, Cow::Borrowed(_)));
1398
1399            let value = attr
1400                .decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1401                .unwrap();
1402            assert_eq!(value, "");
1403            // assert_eq! does not check if value is borrowed, but this is important
1404            assert!(matches!(value, Cow::Borrowed(_)));
1405        }
1406
1407        /// Already normalized values are returned unchanged
1408        #[test]
1409        fn already_normalized() {
1410            let raw_value = "foobar123".as_bytes();
1411            let attr = Attribute::from(("foo".as_bytes(), raw_value));
1412
1413            let value = attr
1414                .decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1415                .unwrap();
1416            assert_eq!(value, "foobar123");
1417            // assert_eq! does not check if value is borrowed, but this is important
1418            assert!(matches!(value, Cow::Borrowed(_)));
1419
1420            let value = attr
1421                .decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1422                .unwrap();
1423            assert_eq!(value, "foobar123");
1424            // assert_eq! does not check if value is borrowed, but this is important
1425            assert!(matches!(value, Cow::Borrowed(_)));
1426
1427            let value = attr
1428                .decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1429                .unwrap();
1430            assert_eq!(value, "foobar123");
1431            // assert_eq! does not check if value is borrowed, but this is important
1432            assert!(matches!(value, Cow::Borrowed(_)));
1433        }
1434
1435        /// Return, tab, and newline characters (0xD, 0x9, 0xA) must be substituted with
1436        /// a space character, \r\n and \r\u{85} should be replaced by one space in 1.1
1437        #[test]
1438        fn space_replacement() {
1439            let raw_value = "\r\nfoo\u{85}\u{2028}\rbar\tbaz\n\ndelta\n\r\u{85}".as_bytes();
1440            let attr = Attribute::from(("foo".as_bytes(), raw_value));
1441
1442            assert_eq!(
1443                attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1444                    .unwrap(),
1445                " foo\u{85}\u{2028} bar baz  delta  \u{85}"
1446            );
1447            assert_eq!(
1448                attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1449                    .unwrap(),
1450                " foo\u{85}\u{2028} bar baz  delta  \u{85}"
1451            );
1452            assert_eq!(
1453                attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1454                    .unwrap(),
1455                " foo   bar baz  delta  "
1456            );
1457        }
1458
1459        /// Entities must be terminated
1460        #[test]
1461        fn unterminated_entity() {
1462            let raw_value = "abc&quotdef".as_bytes();
1463            let attr = Attribute::from(("foo".as_bytes(), raw_value));
1464
1465            match attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8()) {
1466                Err(Error::Escape(err)) => assert_eq!(err, UnterminatedEntity(3..11)),
1467                x => panic!("Expected Err(Escape(_)), got {:?}", x),
1468            }
1469
1470            match attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8()) {
1471                Err(Error::Escape(err)) => assert_eq!(err, UnterminatedEntity(3..11)),
1472                x => panic!("Expected Err(Escape(_)), got {:?}", x),
1473            }
1474
1475            match attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8()) {
1476                Err(Error::Escape(err)) => assert_eq!(err, UnterminatedEntity(3..11)),
1477                x => panic!("Expected Err(Escape(_)), got {:?}", x),
1478            }
1479        }
1480
1481        /// Unknown entities raise error
1482        #[test]
1483        fn unrecognized_entity() {
1484            let raw_value = "abc&unkn;def".as_bytes();
1485            let attr = Attribute::from(("foo".as_bytes(), raw_value));
1486
1487            match attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8()) {
1488                // TODO: is this divergence between range behavior of UnterminatedEntity
1489                // and UnrecognizedEntity appropriate? existing unescape code behaves the same.  (see: start index)
1490                Err(Error::Escape(err)) => {
1491                    assert_eq!(err, UnrecognizedEntity(4..8, "unkn".to_owned()))
1492                }
1493                x => panic!("Expected Err(Escape(err)), got {:?}", x),
1494            }
1495            match attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8()) {
1496                // TODO: is this divergence between range behavior of UnterminatedEntity
1497                // and UnrecognizedEntity appropriate? existing unescape code behaves the same.  (see: start index)
1498                Err(Error::Escape(err)) => {
1499                    assert_eq!(err, UnrecognizedEntity(4..8, "unkn".to_owned()))
1500                }
1501                x => panic!("Expected Err(Escape(err)), got {:?}", x),
1502            }
1503            match attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8()) {
1504                // TODO: is this divergence between range behavior of UnterminatedEntity
1505                // and UnrecognizedEntity appropriate? existing unescape code behaves the same.  (see: start index)
1506                Err(Error::Escape(err)) => {
1507                    assert_eq!(err, UnrecognizedEntity(4..8, "unkn".to_owned()))
1508                }
1509                x => panic!("Expected Err(Escape(err)), got {:?}", x),
1510            }
1511        }
1512
1513        /// custom entity replacement works, entity replacement text processed recursively
1514        #[test]
1515        fn entity_replacement() {
1516            let raw_value = "&d;&d;A&a;&#x20;&a;B&da;".as_bytes();
1517            let attr = Attribute::from(("foo".as_bytes(), raw_value));
1518            fn custom_resolver(ent: &str) -> Option<&'static str> {
1519                match ent {
1520                    "d" => Some("&#xD;"),
1521                    "a" => Some("&#xA;"),
1522                    "da" => Some("&#xD;&#xA;"),
1523                    _ => None,
1524                }
1525            }
1526
1527            assert_eq!(
1528                attr.decoded_and_normalized_value_with(
1529                    Implicit1_0,
1530                    Decoder::utf8(),
1531                    5,
1532                    &custom_resolver
1533                )
1534                .unwrap(),
1535                "\r\rA\n \nB\r\n"
1536            );
1537            assert_eq!(
1538                attr.decoded_and_normalized_value_with(
1539                    Explicit1_0,
1540                    Decoder::utf8(),
1541                    5,
1542                    &custom_resolver
1543                )
1544                .unwrap(),
1545                "\r\rA\n \nB\r\n"
1546            );
1547            assert_eq!(
1548                attr.decoded_and_normalized_value_with(
1549                    Explicit1_1,
1550                    Decoder::utf8(),
1551                    5,
1552                    &custom_resolver
1553                )
1554                .unwrap(),
1555                "\r\rA\n \nB\r\n"
1556            );
1557        }
1558
1559        #[test]
1560        fn char_references() {
1561            // character literal references are substituted without being replaced by spaces
1562            let raw_value = "&#xd;&#xd;A&#xa;&#xa;B&#xd;&#xa;".as_bytes();
1563            let attr = Attribute::from(("foo".as_bytes(), raw_value));
1564
1565            assert_eq!(
1566                attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1567                    .unwrap(),
1568                "\r\rA\n\nB\r\n"
1569            );
1570            assert_eq!(
1571                attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1572                    .unwrap(),
1573                "\r\rA\n\nB\r\n"
1574            );
1575            assert_eq!(
1576                attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1577                    .unwrap(),
1578                "\r\rA\n\nB\r\n"
1579            );
1580        }
1581    }
1582
1583    /// Checked attribute is the single attribute
1584    mod single {
1585        use super::*;
1586        use pretty_assertions::assert_eq;
1587
1588        /// Attribute have a value enclosed in single quotes
1589        #[test]
1590        fn single_quoted() {
1591            let mut iter = Attributes::new(r#"tag key='value'"#, 3);
1592
1593            assert_eq!(
1594                iter.next(),
1595                Some(Ok(Attribute {
1596                    key: QName(b"key"),
1597                    value: Cow::Borrowed(b"value"),
1598                }))
1599            );
1600            assert_eq!(iter.next(), None);
1601            assert_eq!(iter.next(), None);
1602        }
1603
1604        /// Attribute have a value enclosed in double quotes
1605        #[test]
1606        fn double_quoted() {
1607            let mut iter = Attributes::new(r#"tag key="value""#, 3);
1608
1609            assert_eq!(
1610                iter.next(),
1611                Some(Ok(Attribute {
1612                    key: QName(b"key"),
1613                    value: Cow::Borrowed(b"value"),
1614                }))
1615            );
1616            assert_eq!(iter.next(), None);
1617            assert_eq!(iter.next(), None);
1618        }
1619
1620        /// Attribute have a value, not enclosed in quotes
1621        #[test]
1622        fn unquoted() {
1623            let mut iter = Attributes::new(r#"tag key=value"#, 3);
1624            //                                0       ^ = 8
1625
1626            assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8))));
1627            assert_eq!(iter.next(), None);
1628            assert_eq!(iter.next(), None);
1629        }
1630
1631        /// Only attribute key is present
1632        #[test]
1633        fn key_only() {
1634            let mut iter = Attributes::new(r#"tag key"#, 3);
1635            //                                0      ^ = 7
1636
1637            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(7))));
1638            assert_eq!(iter.next(), None);
1639            assert_eq!(iter.next(), None);
1640        }
1641
1642        /// Key is started with an invalid symbol (a single quote in this test).
1643        /// Because we do not check validity of keys and values during parsing,
1644        /// that invalid attribute will be returned
1645        #[test]
1646        fn key_start_invalid() {
1647            let mut iter = Attributes::new(r#"tag 'key'='value'"#, 3);
1648
1649            assert_eq!(
1650                iter.next(),
1651                Some(Ok(Attribute {
1652                    key: QName(b"'key'"),
1653                    value: Cow::Borrowed(b"value"),
1654                }))
1655            );
1656            assert_eq!(iter.next(), None);
1657            assert_eq!(iter.next(), None);
1658        }
1659
1660        /// Key contains an invalid symbol (an ampersand in this test).
1661        /// Because we do not check validity of keys and values during parsing,
1662        /// that invalid attribute will be returned
1663        #[test]
1664        fn key_contains_invalid() {
1665            let mut iter = Attributes::new(r#"tag key&jey='value'"#, 3);
1666
1667            assert_eq!(
1668                iter.next(),
1669                Some(Ok(Attribute {
1670                    key: QName(b"key&jey"),
1671                    value: Cow::Borrowed(b"value"),
1672                }))
1673            );
1674            assert_eq!(iter.next(), None);
1675            assert_eq!(iter.next(), None);
1676        }
1677
1678        /// Attribute value is missing after `=`
1679        #[test]
1680        fn missed_value() {
1681            let mut iter = Attributes::new(r#"tag key="#, 3);
1682            //                                0       ^ = 8
1683
1684            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8))));
1685            assert_eq!(iter.next(), None);
1686            assert_eq!(iter.next(), None);
1687        }
1688    }
1689
1690    /// Checked attribute is the first attribute in the list of many attributes
1691    mod first {
1692        use super::*;
1693        use pretty_assertions::assert_eq;
1694
1695        /// Attribute have a value enclosed in single quotes
1696        #[test]
1697        fn single_quoted() {
1698            let mut iter = Attributes::new(r#"tag key='value' regular='attribute'"#, 3);
1699
1700            assert_eq!(
1701                iter.next(),
1702                Some(Ok(Attribute {
1703                    key: QName(b"key"),
1704                    value: Cow::Borrowed(b"value"),
1705                }))
1706            );
1707            assert_eq!(
1708                iter.next(),
1709                Some(Ok(Attribute {
1710                    key: QName(b"regular"),
1711                    value: Cow::Borrowed(b"attribute"),
1712                }))
1713            );
1714            assert_eq!(iter.next(), None);
1715            assert_eq!(iter.next(), None);
1716        }
1717
1718        /// Attribute have a value enclosed in double quotes
1719        #[test]
1720        fn double_quoted() {
1721            let mut iter = Attributes::new(r#"tag key="value" regular='attribute'"#, 3);
1722
1723            assert_eq!(
1724                iter.next(),
1725                Some(Ok(Attribute {
1726                    key: QName(b"key"),
1727                    value: Cow::Borrowed(b"value"),
1728                }))
1729            );
1730            assert_eq!(
1731                iter.next(),
1732                Some(Ok(Attribute {
1733                    key: QName(b"regular"),
1734                    value: Cow::Borrowed(b"attribute"),
1735                }))
1736            );
1737            assert_eq!(iter.next(), None);
1738            assert_eq!(iter.next(), None);
1739        }
1740
1741        /// Attribute have a value, not enclosed in quotes
1742        #[test]
1743        fn unquoted() {
1744            let mut iter = Attributes::new(r#"tag key=value regular='attribute'"#, 3);
1745            //                                0       ^ = 8
1746
1747            assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8))));
1748            // check error recovery
1749            assert_eq!(
1750                iter.next(),
1751                Some(Ok(Attribute {
1752                    key: QName(b"regular"),
1753                    value: Cow::Borrowed(b"attribute"),
1754                }))
1755            );
1756            assert_eq!(iter.next(), None);
1757            assert_eq!(iter.next(), None);
1758        }
1759
1760        /// Only attribute key is present
1761        #[test]
1762        fn key_only() {
1763            let mut iter = Attributes::new(r#"tag key regular='attribute'"#, 3);
1764            //                                0       ^ = 8
1765
1766            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8))));
1767            // check error recovery
1768            assert_eq!(
1769                iter.next(),
1770                Some(Ok(Attribute {
1771                    key: QName(b"regular"),
1772                    value: Cow::Borrowed(b"attribute"),
1773                }))
1774            );
1775            assert_eq!(iter.next(), None);
1776            assert_eq!(iter.next(), None);
1777        }
1778
1779        /// Key is started with an invalid symbol (a single quote in this test).
1780        /// Because we do not check validity of keys and values during parsing,
1781        /// that invalid attribute will be returned
1782        #[test]
1783        fn key_start_invalid() {
1784            let mut iter = Attributes::new(r#"tag 'key'='value' regular='attribute'"#, 3);
1785
1786            assert_eq!(
1787                iter.next(),
1788                Some(Ok(Attribute {
1789                    key: QName(b"'key'"),
1790                    value: Cow::Borrowed(b"value"),
1791                }))
1792            );
1793            assert_eq!(
1794                iter.next(),
1795                Some(Ok(Attribute {
1796                    key: QName(b"regular"),
1797                    value: Cow::Borrowed(b"attribute"),
1798                }))
1799            );
1800            assert_eq!(iter.next(), None);
1801            assert_eq!(iter.next(), None);
1802        }
1803
1804        /// Key contains an invalid symbol (an ampersand in this test).
1805        /// Because we do not check validity of keys and values during parsing,
1806        /// that invalid attribute will be returned
1807        #[test]
1808        fn key_contains_invalid() {
1809            let mut iter = Attributes::new(r#"tag key&jey='value' regular='attribute'"#, 3);
1810
1811            assert_eq!(
1812                iter.next(),
1813                Some(Ok(Attribute {
1814                    key: QName(b"key&jey"),
1815                    value: Cow::Borrowed(b"value"),
1816                }))
1817            );
1818            assert_eq!(
1819                iter.next(),
1820                Some(Ok(Attribute {
1821                    key: QName(b"regular"),
1822                    value: Cow::Borrowed(b"attribute"),
1823                }))
1824            );
1825            assert_eq!(iter.next(), None);
1826            assert_eq!(iter.next(), None);
1827        }
1828
1829        /// Attribute value is missing after `=`.
1830        #[test]
1831        fn missed_value() {
1832            let mut iter = Attributes::new(r#"tag key= regular='attribute'"#, 3);
1833            //                                0        ^ = 9
1834
1835            assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1836            // Because we do not check validity of keys and values during parsing,
1837            // "error='recovery'" is considered, as unquoted attribute value and
1838            // skipped during recovery and iteration finished
1839            assert_eq!(iter.next(), None);
1840            assert_eq!(iter.next(), None);
1841
1842            ////////////////////////////////////////////////////////////////////
1843
1844            let mut iter = Attributes::new(r#"tag key= regular= 'attribute'"#, 3);
1845            //                                0        ^ = 9               ^ = 29
1846
1847            // In that case "regular=" considered as unquoted value
1848            assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1849            // In that case "'attribute'" considered as a key, because we do not check
1850            // validity of key names
1851            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29))));
1852            assert_eq!(iter.next(), None);
1853            assert_eq!(iter.next(), None);
1854
1855            ////////////////////////////////////////////////////////////////////
1856
1857            let mut iter = Attributes::new(r#"tag key= regular ='attribute'"#, 3);
1858            //                                0        ^ = 9               ^ = 29
1859
1860            // In that case "regular" considered as unquoted value
1861            assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1862            // In that case "='attribute'" considered as a key, because we do not check
1863            // validity of key names
1864            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29))));
1865            assert_eq!(iter.next(), None);
1866            assert_eq!(iter.next(), None);
1867
1868            ////////////////////////////////////////////////////////////////////
1869
1870            let mut iter = Attributes::new(r#"tag key= regular = 'attribute'"#, 3);
1871            //                                0        ^ = 9     ^ = 19     ^ = 30
1872
1873            assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1874            // In that case second "=" considered as a key, because we do not check
1875            // validity of key names
1876            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(19))));
1877            // In that case "'attribute'" considered as a key, because we do not check
1878            // validity of key names
1879            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(30))));
1880            assert_eq!(iter.next(), None);
1881            assert_eq!(iter.next(), None);
1882        }
1883    }
1884
1885    /// Copy of single, but with additional spaces in markup
1886    mod sparsed {
1887        use super::*;
1888        use pretty_assertions::assert_eq;
1889
1890        /// Attribute have a value enclosed in single quotes
1891        #[test]
1892        fn single_quoted() {
1893            let mut iter = Attributes::new(r#"tag key = 'value' "#, 3);
1894
1895            assert_eq!(
1896                iter.next(),
1897                Some(Ok(Attribute {
1898                    key: QName(b"key"),
1899                    value: Cow::Borrowed(b"value"),
1900                }))
1901            );
1902            assert_eq!(iter.next(), None);
1903            assert_eq!(iter.next(), None);
1904        }
1905
1906        /// Attribute have a value enclosed in double quotes
1907        #[test]
1908        fn double_quoted() {
1909            let mut iter = Attributes::new(r#"tag key = "value" "#, 3);
1910
1911            assert_eq!(
1912                iter.next(),
1913                Some(Ok(Attribute {
1914                    key: QName(b"key"),
1915                    value: Cow::Borrowed(b"value"),
1916                }))
1917            );
1918            assert_eq!(iter.next(), None);
1919            assert_eq!(iter.next(), None);
1920        }
1921
1922        /// Attribute have a value, not enclosed in quotes
1923        #[test]
1924        fn unquoted() {
1925            let mut iter = Attributes::new(r#"tag key = value "#, 3);
1926            //                                0         ^ = 10
1927
1928            assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(10))));
1929            assert_eq!(iter.next(), None);
1930            assert_eq!(iter.next(), None);
1931        }
1932
1933        /// Only attribute key is present
1934        #[test]
1935        fn key_only() {
1936            let mut iter = Attributes::new(r#"tag key "#, 3);
1937            //                                0       ^ = 8
1938
1939            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8))));
1940            assert_eq!(iter.next(), None);
1941            assert_eq!(iter.next(), None);
1942        }
1943
1944        /// Key is started with an invalid symbol (a single quote in this test).
1945        /// Because we do not check validity of keys and values during parsing,
1946        /// that invalid attribute will be returned
1947        #[test]
1948        fn key_start_invalid() {
1949            let mut iter = Attributes::new(r#"tag 'key' = 'value' "#, 3);
1950
1951            assert_eq!(
1952                iter.next(),
1953                Some(Ok(Attribute {
1954                    key: QName(b"'key'"),
1955                    value: Cow::Borrowed(b"value"),
1956                }))
1957            );
1958            assert_eq!(iter.next(), None);
1959            assert_eq!(iter.next(), None);
1960        }
1961
1962        /// Key contains an invalid symbol (an ampersand in this test).
1963        /// Because we do not check validity of keys and values during parsing,
1964        /// that invalid attribute will be returned
1965        #[test]
1966        fn key_contains_invalid() {
1967            let mut iter = Attributes::new(r#"tag key&jey = 'value' "#, 3);
1968
1969            assert_eq!(
1970                iter.next(),
1971                Some(Ok(Attribute {
1972                    key: QName(b"key&jey"),
1973                    value: Cow::Borrowed(b"value"),
1974                }))
1975            );
1976            assert_eq!(iter.next(), None);
1977            assert_eq!(iter.next(), None);
1978        }
1979
1980        /// Attribute value is missing after `=`
1981        #[test]
1982        fn missed_value() {
1983            let mut iter = Attributes::new(r#"tag key = "#, 3);
1984            //                                0         ^ = 10
1985
1986            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10))));
1987            assert_eq!(iter.next(), None);
1988            assert_eq!(iter.next(), None);
1989        }
1990    }
1991
1992    /// Checks that duplicated attributes correctly reported and recovering is
1993    /// possible after that
1994    mod duplicated {
1995        use super::*;
1996
1997        mod with_check {
1998            use super::*;
1999            use pretty_assertions::assert_eq;
2000
2001            /// Attribute have a value enclosed in single quotes
2002            #[test]
2003            fn single_quoted() {
2004                let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3);
2005                //                                0   ^ = 4       ^ = 16
2006
2007                assert_eq!(
2008                    iter.next(),
2009                    Some(Ok(Attribute {
2010                        key: QName(b"key"),
2011                        value: Cow::Borrowed(b"value"),
2012                    }))
2013                );
2014                assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2015                assert_eq!(
2016                    iter.next(),
2017                    Some(Ok(Attribute {
2018                        key: QName(b"another"),
2019                        value: Cow::Borrowed(b""),
2020                    }))
2021                );
2022                assert_eq!(iter.next(), None);
2023                assert_eq!(iter.next(), None);
2024            }
2025
2026            /// Attribute have a value enclosed in double quotes
2027            #[test]
2028            fn double_quoted() {
2029                let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3);
2030                //                                0   ^ = 4       ^ = 16
2031
2032                assert_eq!(
2033                    iter.next(),
2034                    Some(Ok(Attribute {
2035                        key: QName(b"key"),
2036                        value: Cow::Borrowed(b"value"),
2037                    }))
2038                );
2039                assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2040                assert_eq!(
2041                    iter.next(),
2042                    Some(Ok(Attribute {
2043                        key: QName(b"another"),
2044                        value: Cow::Borrowed(b""),
2045                    }))
2046                );
2047                assert_eq!(iter.next(), None);
2048                assert_eq!(iter.next(), None);
2049            }
2050
2051            /// Attribute have a value, not enclosed in quotes
2052            #[test]
2053            fn unquoted() {
2054                let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3);
2055                //                                0   ^ = 4       ^ = 16
2056
2057                assert_eq!(
2058                    iter.next(),
2059                    Some(Ok(Attribute {
2060                        key: QName(b"key"),
2061                        value: Cow::Borrowed(b"value"),
2062                    }))
2063                );
2064                assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2065                assert_eq!(
2066                    iter.next(),
2067                    Some(Ok(Attribute {
2068                        key: QName(b"another"),
2069                        value: Cow::Borrowed(b""),
2070                    }))
2071                );
2072                assert_eq!(iter.next(), None);
2073                assert_eq!(iter.next(), None);
2074            }
2075
2076            /// Only attribute key is present
2077            #[test]
2078            fn key_only() {
2079                let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3);
2080                //                                0                   ^ = 20
2081
2082                assert_eq!(
2083                    iter.next(),
2084                    Some(Ok(Attribute {
2085                        key: QName(b"key"),
2086                        value: Cow::Borrowed(b"value"),
2087                    }))
2088                );
2089                assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20))));
2090                assert_eq!(
2091                    iter.next(),
2092                    Some(Ok(Attribute {
2093                        key: QName(b"another"),
2094                        value: Cow::Borrowed(b""),
2095                    }))
2096                );
2097                assert_eq!(iter.next(), None);
2098                assert_eq!(iter.next(), None);
2099            }
2100
2101            /// Once a start tag declares more than `SMALL_ATTRIBUTE_COUNT`
2102            /// attributes the duplicate check switches to its hash-based path. A
2103            /// duplicate of a name first seen during the earlier linear phase must
2104            /// still be detected, with the original position reported. Regression
2105            /// cover for the cold path of [#969].
2106            ///
2107            /// [#969]: https://github.com/tafia/quick-xml/issues/969
2108            #[test]
2109            fn duplicate_past_hash_threshold() {
2110                let dup = SMALL_ATTRIBUTE_COUNT / 2;
2111                let n = SMALL_ATTRIBUTE_COUNT + 8;
2112
2113                let mut source = String::from("tag");
2114                let mut positions = Vec::with_capacity(n);
2115                for i in 0..n {
2116                    source.push(' ');
2117                    positions.push(source.len());
2118                    source.push_str(&format!("k{:04}=''", i));
2119                }
2120                // Repeat the name first seen at `positions[dup]` (linear phase).
2121                source.push(' ');
2122                let dup_pos = source.len();
2123                source.push_str(&format!("k{:04}=''", dup));
2124
2125                let mut iter = Attributes::new(&source, 3);
2126                for _ in 0..n {
2127                    assert!(matches!(iter.next(), Some(Ok(_))));
2128                }
2129                assert_eq!(
2130                    iter.next(),
2131                    Some(Err(AttrError::Duplicated(dup_pos, positions[dup])))
2132                );
2133            }
2134        }
2135
2136        /// Check for duplicated names is disabled
2137        mod without_check {
2138            use super::*;
2139            use pretty_assertions::assert_eq;
2140
2141            /// Attribute have a value enclosed in single quotes
2142            #[test]
2143            fn single_quoted() {
2144                let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3);
2145                iter.with_checks(false);
2146
2147                assert_eq!(
2148                    iter.next(),
2149                    Some(Ok(Attribute {
2150                        key: QName(b"key"),
2151                        value: Cow::Borrowed(b"value"),
2152                    }))
2153                );
2154                assert_eq!(
2155                    iter.next(),
2156                    Some(Ok(Attribute {
2157                        key: QName(b"key"),
2158                        value: Cow::Borrowed(b"dup"),
2159                    }))
2160                );
2161                assert_eq!(
2162                    iter.next(),
2163                    Some(Ok(Attribute {
2164                        key: QName(b"another"),
2165                        value: Cow::Borrowed(b""),
2166                    }))
2167                );
2168                assert_eq!(iter.next(), None);
2169                assert_eq!(iter.next(), None);
2170            }
2171
2172            /// Attribute have a value enclosed in double quotes
2173            #[test]
2174            fn double_quoted() {
2175                let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3);
2176                iter.with_checks(false);
2177
2178                assert_eq!(
2179                    iter.next(),
2180                    Some(Ok(Attribute {
2181                        key: QName(b"key"),
2182                        value: Cow::Borrowed(b"value"),
2183                    }))
2184                );
2185                assert_eq!(
2186                    iter.next(),
2187                    Some(Ok(Attribute {
2188                        key: QName(b"key"),
2189                        value: Cow::Borrowed(b"dup"),
2190                    }))
2191                );
2192                assert_eq!(
2193                    iter.next(),
2194                    Some(Ok(Attribute {
2195                        key: QName(b"another"),
2196                        value: Cow::Borrowed(b""),
2197                    }))
2198                );
2199                assert_eq!(iter.next(), None);
2200                assert_eq!(iter.next(), None);
2201            }
2202
2203            /// Attribute have a value, not enclosed in quotes
2204            #[test]
2205            fn unquoted() {
2206                let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3);
2207                //                                0                   ^ = 20
2208                iter.with_checks(false);
2209
2210                assert_eq!(
2211                    iter.next(),
2212                    Some(Ok(Attribute {
2213                        key: QName(b"key"),
2214                        value: Cow::Borrowed(b"value"),
2215                    }))
2216                );
2217                assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(20))));
2218                assert_eq!(
2219                    iter.next(),
2220                    Some(Ok(Attribute {
2221                        key: QName(b"another"),
2222                        value: Cow::Borrowed(b""),
2223                    }))
2224                );
2225                assert_eq!(iter.next(), None);
2226                assert_eq!(iter.next(), None);
2227            }
2228
2229            /// Only attribute key is present
2230            #[test]
2231            fn key_only() {
2232                let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3);
2233                //                                0                   ^ = 20
2234                iter.with_checks(false);
2235
2236                assert_eq!(
2237                    iter.next(),
2238                    Some(Ok(Attribute {
2239                        key: QName(b"key"),
2240                        value: Cow::Borrowed(b"value"),
2241                    }))
2242                );
2243                assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20))));
2244                assert_eq!(
2245                    iter.next(),
2246                    Some(Ok(Attribute {
2247                        key: QName(b"another"),
2248                        value: Cow::Borrowed(b""),
2249                    }))
2250                );
2251                assert_eq!(iter.next(), None);
2252                assert_eq!(iter.next(), None);
2253            }
2254        }
2255    }
2256
2257    #[test]
2258    fn mixed_quote() {
2259        let mut iter = Attributes::new(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3);
2260
2261        assert_eq!(
2262            iter.next(),
2263            Some(Ok(Attribute {
2264                key: QName(b"a"),
2265                value: Cow::Borrowed(b"a"),
2266            }))
2267        );
2268        assert_eq!(
2269            iter.next(),
2270            Some(Ok(Attribute {
2271                key: QName(b"b"),
2272                value: Cow::Borrowed(b"b"),
2273            }))
2274        );
2275        assert_eq!(
2276            iter.next(),
2277            Some(Ok(Attribute {
2278                key: QName(b"c"),
2279                value: Cow::Borrowed(br#"cc"cc"#),
2280            }))
2281        );
2282        assert_eq!(
2283            iter.next(),
2284            Some(Ok(Attribute {
2285                key: QName(b"d"),
2286                value: Cow::Borrowed(b"dd'dd"),
2287            }))
2288        );
2289        assert_eq!(iter.next(), None);
2290        assert_eq!(iter.next(), None);
2291    }
2292}
2293
2294/// Checks, how parsing of HTML-style attributes works. Each attribute can be
2295/// in three forms:
2296/// - XML-like: have a value, enclosed in single or double quotes
2297/// - have a value, do not enclosed in quotes
2298/// - without value, key only
2299#[cfg(test)]
2300mod html {
2301    use super::*;
2302    use pretty_assertions::assert_eq;
2303
2304    /// Checked attribute is the single attribute
2305    mod single {
2306        use super::*;
2307        use pretty_assertions::assert_eq;
2308
2309        /// Attribute have a value enclosed in single quotes
2310        #[test]
2311        fn single_quoted() {
2312            let mut iter = Attributes::html(r#"tag key='value'"#, 3);
2313
2314            assert_eq!(
2315                iter.next(),
2316                Some(Ok(Attribute {
2317                    key: QName(b"key"),
2318                    value: Cow::Borrowed(b"value"),
2319                }))
2320            );
2321            assert_eq!(iter.next(), None);
2322            assert_eq!(iter.next(), None);
2323        }
2324
2325        /// Attribute have a value enclosed in double quotes
2326        #[test]
2327        fn double_quoted() {
2328            let mut iter = Attributes::html(r#"tag key="value""#, 3);
2329
2330            assert_eq!(
2331                iter.next(),
2332                Some(Ok(Attribute {
2333                    key: QName(b"key"),
2334                    value: Cow::Borrowed(b"value"),
2335                }))
2336            );
2337            assert_eq!(iter.next(), None);
2338            assert_eq!(iter.next(), None);
2339        }
2340
2341        /// Attribute have a value, not enclosed in quotes
2342        #[test]
2343        fn unquoted() {
2344            let mut iter = Attributes::html(r#"tag key=value"#, 3);
2345
2346            assert_eq!(
2347                iter.next(),
2348                Some(Ok(Attribute {
2349                    key: QName(b"key"),
2350                    value: Cow::Borrowed(b"value"),
2351                }))
2352            );
2353            assert_eq!(iter.next(), None);
2354            assert_eq!(iter.next(), None);
2355        }
2356
2357        /// Only attribute key is present
2358        #[test]
2359        fn key_only() {
2360            let mut iter = Attributes::html(r#"tag key"#, 3);
2361
2362            assert_eq!(
2363                iter.next(),
2364                Some(Ok(Attribute {
2365                    key: QName(b"key"),
2366                    value: Cow::Borrowed(&[]),
2367                }))
2368            );
2369            assert_eq!(iter.next(), None);
2370            assert_eq!(iter.next(), None);
2371        }
2372
2373        /// Key is started with an invalid symbol (a single quote in this test).
2374        /// Because we do not check validity of keys and values during parsing,
2375        /// that invalid attribute will be returned
2376        #[test]
2377        fn key_start_invalid() {
2378            let mut iter = Attributes::html(r#"tag 'key'='value'"#, 3);
2379
2380            assert_eq!(
2381                iter.next(),
2382                Some(Ok(Attribute {
2383                    key: QName(b"'key'"),
2384                    value: Cow::Borrowed(b"value"),
2385                }))
2386            );
2387            assert_eq!(iter.next(), None);
2388            assert_eq!(iter.next(), None);
2389        }
2390
2391        /// Key contains an invalid symbol (an ampersand in this test).
2392        /// Because we do not check validity of keys and values during parsing,
2393        /// that invalid attribute will be returned
2394        #[test]
2395        fn key_contains_invalid() {
2396            let mut iter = Attributes::html(r#"tag key&jey='value'"#, 3);
2397
2398            assert_eq!(
2399                iter.next(),
2400                Some(Ok(Attribute {
2401                    key: QName(b"key&jey"),
2402                    value: Cow::Borrowed(b"value"),
2403                }))
2404            );
2405            assert_eq!(iter.next(), None);
2406            assert_eq!(iter.next(), None);
2407        }
2408
2409        /// Attribute value is missing after `=`
2410        #[test]
2411        fn missed_value() {
2412            let mut iter = Attributes::html(r#"tag key="#, 3);
2413            //                                0       ^ = 8
2414
2415            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8))));
2416            assert_eq!(iter.next(), None);
2417            assert_eq!(iter.next(), None);
2418        }
2419    }
2420
2421    /// Checked attribute is the first attribute in the list of many attributes
2422    mod first {
2423        use super::*;
2424        use pretty_assertions::assert_eq;
2425
2426        /// Attribute have a value enclosed in single quotes
2427        #[test]
2428        fn single_quoted() {
2429            let mut iter = Attributes::html(r#"tag key='value' regular='attribute'"#, 3);
2430
2431            assert_eq!(
2432                iter.next(),
2433                Some(Ok(Attribute {
2434                    key: QName(b"key"),
2435                    value: Cow::Borrowed(b"value"),
2436                }))
2437            );
2438            assert_eq!(
2439                iter.next(),
2440                Some(Ok(Attribute {
2441                    key: QName(b"regular"),
2442                    value: Cow::Borrowed(b"attribute"),
2443                }))
2444            );
2445            assert_eq!(iter.next(), None);
2446            assert_eq!(iter.next(), None);
2447        }
2448
2449        /// Attribute have a value enclosed in double quotes
2450        #[test]
2451        fn double_quoted() {
2452            let mut iter = Attributes::html(r#"tag key="value" regular='attribute'"#, 3);
2453
2454            assert_eq!(
2455                iter.next(),
2456                Some(Ok(Attribute {
2457                    key: QName(b"key"),
2458                    value: Cow::Borrowed(b"value"),
2459                }))
2460            );
2461            assert_eq!(
2462                iter.next(),
2463                Some(Ok(Attribute {
2464                    key: QName(b"regular"),
2465                    value: Cow::Borrowed(b"attribute"),
2466                }))
2467            );
2468            assert_eq!(iter.next(), None);
2469            assert_eq!(iter.next(), None);
2470        }
2471
2472        /// Attribute have a value, not enclosed in quotes
2473        #[test]
2474        fn unquoted() {
2475            let mut iter = Attributes::html(r#"tag key=value regular='attribute'"#, 3);
2476
2477            assert_eq!(
2478                iter.next(),
2479                Some(Ok(Attribute {
2480                    key: QName(b"key"),
2481                    value: Cow::Borrowed(b"value"),
2482                }))
2483            );
2484            assert_eq!(
2485                iter.next(),
2486                Some(Ok(Attribute {
2487                    key: QName(b"regular"),
2488                    value: Cow::Borrowed(b"attribute"),
2489                }))
2490            );
2491            assert_eq!(iter.next(), None);
2492            assert_eq!(iter.next(), None);
2493        }
2494
2495        /// Only attribute key is present
2496        #[test]
2497        fn key_only() {
2498            let mut iter = Attributes::html(r#"tag key regular='attribute'"#, 3);
2499
2500            assert_eq!(
2501                iter.next(),
2502                Some(Ok(Attribute {
2503                    key: QName(b"key"),
2504                    value: Cow::Borrowed(&[]),
2505                }))
2506            );
2507            assert_eq!(
2508                iter.next(),
2509                Some(Ok(Attribute {
2510                    key: QName(b"regular"),
2511                    value: Cow::Borrowed(b"attribute"),
2512                }))
2513            );
2514            assert_eq!(iter.next(), None);
2515            assert_eq!(iter.next(), None);
2516        }
2517
2518        /// Key is started with an invalid symbol (a single quote in this test).
2519        /// Because we do not check validity of keys and values during parsing,
2520        /// that invalid attribute will be returned
2521        #[test]
2522        fn key_start_invalid() {
2523            let mut iter = Attributes::html(r#"tag 'key'='value' regular='attribute'"#, 3);
2524
2525            assert_eq!(
2526                iter.next(),
2527                Some(Ok(Attribute {
2528                    key: QName(b"'key'"),
2529                    value: Cow::Borrowed(b"value"),
2530                }))
2531            );
2532            assert_eq!(
2533                iter.next(),
2534                Some(Ok(Attribute {
2535                    key: QName(b"regular"),
2536                    value: Cow::Borrowed(b"attribute"),
2537                }))
2538            );
2539            assert_eq!(iter.next(), None);
2540            assert_eq!(iter.next(), None);
2541        }
2542
2543        /// Key contains an invalid symbol (an ampersand in this test).
2544        /// Because we do not check validity of keys and values during parsing,
2545        /// that invalid attribute will be returned
2546        #[test]
2547        fn key_contains_invalid() {
2548            let mut iter = Attributes::html(r#"tag key&jey='value' regular='attribute'"#, 3);
2549
2550            assert_eq!(
2551                iter.next(),
2552                Some(Ok(Attribute {
2553                    key: QName(b"key&jey"),
2554                    value: Cow::Borrowed(b"value"),
2555                }))
2556            );
2557            assert_eq!(
2558                iter.next(),
2559                Some(Ok(Attribute {
2560                    key: QName(b"regular"),
2561                    value: Cow::Borrowed(b"attribute"),
2562                }))
2563            );
2564            assert_eq!(iter.next(), None);
2565            assert_eq!(iter.next(), None);
2566        }
2567
2568        /// Attribute value is missing after `=`
2569        #[test]
2570        fn missed_value() {
2571            let mut iter = Attributes::html(r#"tag key= regular='attribute'"#, 3);
2572
2573            // Because we do not check validity of keys and values during parsing,
2574            // "regular='attribute'" is considered as unquoted attribute value
2575            assert_eq!(
2576                iter.next(),
2577                Some(Ok(Attribute {
2578                    key: QName(b"key"),
2579                    value: Cow::Borrowed(b"regular='attribute'"),
2580                }))
2581            );
2582            assert_eq!(iter.next(), None);
2583            assert_eq!(iter.next(), None);
2584
2585            ////////////////////////////////////////////////////////////////////
2586
2587            let mut iter = Attributes::html(r#"tag key= regular= 'attribute'"#, 3);
2588
2589            // Because we do not check validity of keys and values during parsing,
2590            // "regular=" is considered as unquoted attribute value
2591            assert_eq!(
2592                iter.next(),
2593                Some(Ok(Attribute {
2594                    key: QName(b"key"),
2595                    value: Cow::Borrowed(b"regular="),
2596                }))
2597            );
2598            // Because we do not check validity of keys and values during parsing,
2599            // "'attribute'" is considered as key-only attribute
2600            assert_eq!(
2601                iter.next(),
2602                Some(Ok(Attribute {
2603                    key: QName(b"'attribute'"),
2604                    value: Cow::Borrowed(&[]),
2605                }))
2606            );
2607            assert_eq!(iter.next(), None);
2608            assert_eq!(iter.next(), None);
2609
2610            ////////////////////////////////////////////////////////////////////
2611
2612            let mut iter = Attributes::html(r#"tag key= regular ='attribute'"#, 3);
2613
2614            // Because we do not check validity of keys and values during parsing,
2615            // "regular" is considered as unquoted attribute value
2616            assert_eq!(
2617                iter.next(),
2618                Some(Ok(Attribute {
2619                    key: QName(b"key"),
2620                    value: Cow::Borrowed(b"regular"),
2621                }))
2622            );
2623            // Because we do not check validity of keys and values during parsing,
2624            // "='attribute'" is considered as key-only attribute
2625            assert_eq!(
2626                iter.next(),
2627                Some(Ok(Attribute {
2628                    key: QName(b"='attribute'"),
2629                    value: Cow::Borrowed(&[]),
2630                }))
2631            );
2632            assert_eq!(iter.next(), None);
2633            assert_eq!(iter.next(), None);
2634
2635            ////////////////////////////////////////////////////////////////////
2636
2637            let mut iter = Attributes::html(r#"tag key= regular = 'attribute'"#, 3);
2638            //                                 0        ^ = 9     ^ = 19     ^ = 30
2639
2640            // Because we do not check validity of keys and values during parsing,
2641            // "regular" is considered as unquoted attribute value
2642            assert_eq!(
2643                iter.next(),
2644                Some(Ok(Attribute {
2645                    key: QName(b"key"),
2646                    value: Cow::Borrowed(b"regular"),
2647                }))
2648            );
2649            // Because we do not check validity of keys and values during parsing,
2650            // "=" is considered as key-only attribute
2651            assert_eq!(
2652                iter.next(),
2653                Some(Ok(Attribute {
2654                    key: QName(b"="),
2655                    value: Cow::Borrowed(&[]),
2656                }))
2657            );
2658            // Because we do not check validity of keys and values during parsing,
2659            // "'attribute'" is considered as key-only attribute
2660            assert_eq!(
2661                iter.next(),
2662                Some(Ok(Attribute {
2663                    key: QName(b"'attribute'"),
2664                    value: Cow::Borrowed(&[]),
2665                }))
2666            );
2667            assert_eq!(iter.next(), None);
2668            assert_eq!(iter.next(), None);
2669        }
2670    }
2671
2672    /// Copy of single, but with additional spaces in markup
2673    mod sparsed {
2674        use super::*;
2675        use pretty_assertions::assert_eq;
2676
2677        /// Attribute have a value enclosed in single quotes
2678        #[test]
2679        fn single_quoted() {
2680            let mut iter = Attributes::html(r#"tag key = 'value' "#, 3);
2681
2682            assert_eq!(
2683                iter.next(),
2684                Some(Ok(Attribute {
2685                    key: QName(b"key"),
2686                    value: Cow::Borrowed(b"value"),
2687                }))
2688            );
2689            assert_eq!(iter.next(), None);
2690            assert_eq!(iter.next(), None);
2691        }
2692
2693        /// Attribute have a value enclosed in double quotes
2694        #[test]
2695        fn double_quoted() {
2696            let mut iter = Attributes::html(r#"tag key = "value" "#, 3);
2697
2698            assert_eq!(
2699                iter.next(),
2700                Some(Ok(Attribute {
2701                    key: QName(b"key"),
2702                    value: Cow::Borrowed(b"value"),
2703                }))
2704            );
2705            assert_eq!(iter.next(), None);
2706            assert_eq!(iter.next(), None);
2707        }
2708
2709        /// Attribute have a value, not enclosed in quotes
2710        #[test]
2711        fn unquoted() {
2712            let mut iter = Attributes::html(r#"tag key = value "#, 3);
2713
2714            assert_eq!(
2715                iter.next(),
2716                Some(Ok(Attribute {
2717                    key: QName(b"key"),
2718                    value: Cow::Borrowed(b"value"),
2719                }))
2720            );
2721            assert_eq!(iter.next(), None);
2722            assert_eq!(iter.next(), None);
2723        }
2724
2725        /// Only attribute key is present
2726        #[test]
2727        fn key_only() {
2728            let mut iter = Attributes::html(r#"tag key "#, 3);
2729
2730            assert_eq!(
2731                iter.next(),
2732                Some(Ok(Attribute {
2733                    key: QName(b"key"),
2734                    value: Cow::Borrowed(&[]),
2735                }))
2736            );
2737            assert_eq!(iter.next(), None);
2738            assert_eq!(iter.next(), None);
2739        }
2740
2741        /// Key is started with an invalid symbol (a single quote in this test).
2742        /// Because we do not check validity of keys and values during parsing,
2743        /// that invalid attribute will be returned
2744        #[test]
2745        fn key_start_invalid() {
2746            let mut iter = Attributes::html(r#"tag 'key' = 'value' "#, 3);
2747
2748            assert_eq!(
2749                iter.next(),
2750                Some(Ok(Attribute {
2751                    key: QName(b"'key'"),
2752                    value: Cow::Borrowed(b"value"),
2753                }))
2754            );
2755            assert_eq!(iter.next(), None);
2756            assert_eq!(iter.next(), None);
2757        }
2758
2759        /// Key contains an invalid symbol (an ampersand in this test).
2760        /// Because we do not check validity of keys and values during parsing,
2761        /// that invalid attribute will be returned
2762        #[test]
2763        fn key_contains_invalid() {
2764            let mut iter = Attributes::html(r#"tag key&jey = 'value' "#, 3);
2765
2766            assert_eq!(
2767                iter.next(),
2768                Some(Ok(Attribute {
2769                    key: QName(b"key&jey"),
2770                    value: Cow::Borrowed(b"value"),
2771                }))
2772            );
2773            assert_eq!(iter.next(), None);
2774            assert_eq!(iter.next(), None);
2775        }
2776
2777        /// Attribute value is missing after `=`
2778        #[test]
2779        fn missed_value() {
2780            let mut iter = Attributes::html(r#"tag key = "#, 3);
2781            //                                 0         ^ = 10
2782
2783            assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10))));
2784            assert_eq!(iter.next(), None);
2785            assert_eq!(iter.next(), None);
2786        }
2787    }
2788
2789    /// Checks that duplicated attributes correctly reported and recovering is
2790    /// possible after that
2791    mod duplicated {
2792        use super::*;
2793
2794        mod with_check {
2795            use super::*;
2796            use pretty_assertions::assert_eq;
2797
2798            /// Attribute have a value enclosed in single quotes
2799            #[test]
2800            fn single_quoted() {
2801                let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3);
2802                //                                 0   ^ = 4       ^ = 16
2803
2804                assert_eq!(
2805                    iter.next(),
2806                    Some(Ok(Attribute {
2807                        key: QName(b"key"),
2808                        value: Cow::Borrowed(b"value"),
2809                    }))
2810                );
2811                assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2812                assert_eq!(
2813                    iter.next(),
2814                    Some(Ok(Attribute {
2815                        key: QName(b"another"),
2816                        value: Cow::Borrowed(b""),
2817                    }))
2818                );
2819                assert_eq!(iter.next(), None);
2820                assert_eq!(iter.next(), None);
2821            }
2822
2823            /// Attribute have a value enclosed in double quotes
2824            #[test]
2825            fn double_quoted() {
2826                let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3);
2827                //                                 0   ^ = 4       ^ = 16
2828
2829                assert_eq!(
2830                    iter.next(),
2831                    Some(Ok(Attribute {
2832                        key: QName(b"key"),
2833                        value: Cow::Borrowed(b"value"),
2834                    }))
2835                );
2836                assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2837                assert_eq!(
2838                    iter.next(),
2839                    Some(Ok(Attribute {
2840                        key: QName(b"another"),
2841                        value: Cow::Borrowed(b""),
2842                    }))
2843                );
2844                assert_eq!(iter.next(), None);
2845                assert_eq!(iter.next(), None);
2846            }
2847
2848            /// Attribute have a value, not enclosed in quotes
2849            #[test]
2850            fn unquoted() {
2851                let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3);
2852                //                                 0   ^ = 4       ^ = 16
2853
2854                assert_eq!(
2855                    iter.next(),
2856                    Some(Ok(Attribute {
2857                        key: QName(b"key"),
2858                        value: Cow::Borrowed(b"value"),
2859                    }))
2860                );
2861                assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2862                assert_eq!(
2863                    iter.next(),
2864                    Some(Ok(Attribute {
2865                        key: QName(b"another"),
2866                        value: Cow::Borrowed(b""),
2867                    }))
2868                );
2869                assert_eq!(iter.next(), None);
2870                assert_eq!(iter.next(), None);
2871            }
2872
2873            /// Only attribute key is present
2874            #[test]
2875            fn key_only() {
2876                let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3);
2877                //                                 0   ^ = 4       ^ = 16
2878
2879                assert_eq!(
2880                    iter.next(),
2881                    Some(Ok(Attribute {
2882                        key: QName(b"key"),
2883                        value: Cow::Borrowed(b"value"),
2884                    }))
2885                );
2886                assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2887                assert_eq!(
2888                    iter.next(),
2889                    Some(Ok(Attribute {
2890                        key: QName(b"another"),
2891                        value: Cow::Borrowed(b""),
2892                    }))
2893                );
2894                assert_eq!(iter.next(), None);
2895                assert_eq!(iter.next(), None);
2896            }
2897        }
2898
2899        /// Check for duplicated names is disabled
2900        mod without_check {
2901            use super::*;
2902            use pretty_assertions::assert_eq;
2903
2904            /// Attribute have a value enclosed in single quotes
2905            #[test]
2906            fn single_quoted() {
2907                let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3);
2908                iter.with_checks(false);
2909
2910                assert_eq!(
2911                    iter.next(),
2912                    Some(Ok(Attribute {
2913                        key: QName(b"key"),
2914                        value: Cow::Borrowed(b"value"),
2915                    }))
2916                );
2917                assert_eq!(
2918                    iter.next(),
2919                    Some(Ok(Attribute {
2920                        key: QName(b"key"),
2921                        value: Cow::Borrowed(b"dup"),
2922                    }))
2923                );
2924                assert_eq!(
2925                    iter.next(),
2926                    Some(Ok(Attribute {
2927                        key: QName(b"another"),
2928                        value: Cow::Borrowed(b""),
2929                    }))
2930                );
2931                assert_eq!(iter.next(), None);
2932                assert_eq!(iter.next(), None);
2933            }
2934
2935            /// Attribute have a value enclosed in double quotes
2936            #[test]
2937            fn double_quoted() {
2938                let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3);
2939                iter.with_checks(false);
2940
2941                assert_eq!(
2942                    iter.next(),
2943                    Some(Ok(Attribute {
2944                        key: QName(b"key"),
2945                        value: Cow::Borrowed(b"value"),
2946                    }))
2947                );
2948                assert_eq!(
2949                    iter.next(),
2950                    Some(Ok(Attribute {
2951                        key: QName(b"key"),
2952                        value: Cow::Borrowed(b"dup"),
2953                    }))
2954                );
2955                assert_eq!(
2956                    iter.next(),
2957                    Some(Ok(Attribute {
2958                        key: QName(b"another"),
2959                        value: Cow::Borrowed(b""),
2960                    }))
2961                );
2962                assert_eq!(iter.next(), None);
2963                assert_eq!(iter.next(), None);
2964            }
2965
2966            /// Attribute have a value, not enclosed in quotes
2967            #[test]
2968            fn unquoted() {
2969                let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3);
2970                iter.with_checks(false);
2971
2972                assert_eq!(
2973                    iter.next(),
2974                    Some(Ok(Attribute {
2975                        key: QName(b"key"),
2976                        value: Cow::Borrowed(b"value"),
2977                    }))
2978                );
2979                assert_eq!(
2980                    iter.next(),
2981                    Some(Ok(Attribute {
2982                        key: QName(b"key"),
2983                        value: Cow::Borrowed(b"dup"),
2984                    }))
2985                );
2986                assert_eq!(
2987                    iter.next(),
2988                    Some(Ok(Attribute {
2989                        key: QName(b"another"),
2990                        value: Cow::Borrowed(b""),
2991                    }))
2992                );
2993                assert_eq!(iter.next(), None);
2994                assert_eq!(iter.next(), None);
2995            }
2996
2997            /// Only attribute key is present
2998            #[test]
2999            fn key_only() {
3000                let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3);
3001                iter.with_checks(false);
3002
3003                assert_eq!(
3004                    iter.next(),
3005                    Some(Ok(Attribute {
3006                        key: QName(b"key"),
3007                        value: Cow::Borrowed(b"value"),
3008                    }))
3009                );
3010                assert_eq!(
3011                    iter.next(),
3012                    Some(Ok(Attribute {
3013                        key: QName(b"key"),
3014                        value: Cow::Borrowed(&[]),
3015                    }))
3016                );
3017                assert_eq!(
3018                    iter.next(),
3019                    Some(Ok(Attribute {
3020                        key: QName(b"another"),
3021                        value: Cow::Borrowed(b""),
3022                    }))
3023                );
3024                assert_eq!(iter.next(), None);
3025                assert_eq!(iter.next(), None);
3026            }
3027        }
3028    }
3029
3030    #[test]
3031    fn mixed_quote() {
3032        let mut iter = Attributes::html(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3);
3033
3034        assert_eq!(
3035            iter.next(),
3036            Some(Ok(Attribute {
3037                key: QName(b"a"),
3038                value: Cow::Borrowed(b"a"),
3039            }))
3040        );
3041        assert_eq!(
3042            iter.next(),
3043            Some(Ok(Attribute {
3044                key: QName(b"b"),
3045                value: Cow::Borrowed(b"b"),
3046            }))
3047        );
3048        assert_eq!(
3049            iter.next(),
3050            Some(Ok(Attribute {
3051                key: QName(b"c"),
3052                value: Cow::Borrowed(br#"cc"cc"#),
3053            }))
3054        );
3055        assert_eq!(
3056            iter.next(),
3057            Some(Ok(Attribute {
3058                key: QName(b"d"),
3059                value: Cow::Borrowed(b"dd'dd"),
3060            }))
3061        );
3062        assert_eq!(iter.next(), None);
3063        assert_eq!(iter.next(), None);
3064    }
3065}