quick_xml/events/attributes.rs
1//! Xml Attributes module
2//!
3//! Provides an iterator over attributes key/value pairs
4
5use crate::encoding::Decoder;
6use crate::errors::Result as XmlResult;
7use crate::escape::{escape, resolve_predefined_entity};
8use crate::name::{LocalName, Namespace, NamespaceResolver, QName};
9use crate::utils::{is_whitespace, Bytes};
10use crate::XmlVersion;
11
12use std::collections::HashSet;
13use std::fmt::{self, Debug, Display, Formatter};
14use std::hash::{BuildHasherDefault, DefaultHasher, Hasher};
15use std::iter::FusedIterator;
16use std::{borrow::Cow, ops::Range};
17
18/// A struct representing a key/value XML attribute.
19///
20/// Field `value` stores raw bytes, possibly containing escape-sequences. Most users will likely
21/// want to access the value using one of the [`normalized_value`] and [`decoded_and_normalized_value`]
22/// functions.
23///
24/// [`normalized_value`]: Self::normalized_value
25/// [`decoded_and_normalized_value`]: Self::decoded_and_normalized_value
26#[derive(Clone, Eq, PartialEq)]
27pub struct Attribute<'a> {
28 /// The key to uniquely define the attribute.
29 ///
30 /// If [`Attributes::with_checks`] is turned off, the key might not be unique.
31 pub key: QName<'a>,
32 /// The raw value of the attribute.
33 pub value: Cow<'a, [u8]>,
34}
35
36impl<'a> Attribute<'a> {
37 /// Returns the attribute value normalized as per [the XML specification] (or [for 1.0]).
38 ///
39 /// The document **must** be UTF-8 encoded, or pre-processed using [`DecodingReader`].
40 ///
41 /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
42 ///
43 /// The following escape sequences are replaced with their unescaped equivalents:
44 ///
45 /// | Escape Sequence | Replacement
46 /// |-----------------|------------
47 /// | `<` | `<`
48 /// | `>` | `>`
49 /// | `&` | `&`
50 /// | `'` | `'`
51 /// | `"` | `"`
52 ///
53 /// This will allocate unless the raw attribute value does not require normalization.
54 ///
55 /// Note, although you may use this library to parse HTML, you cannot use this
56 /// method to get HTML content, because its returns normalized value: the following
57 /// sequences are translated into a single space (U+0020) character:
58 ///
59 /// - `\r\n`
60 /// - `\r\x85` (only XML 1.1)
61 /// - `\r`
62 /// - `\n`
63 /// - `\t`
64 /// - `\x85` (only XML 1.1)
65 /// - `\x2028` (only XML 1.1)
66 ///
67 /// The text in HTML normally is not normalized in any way; normalization is
68 /// performed only in limited contexts and [only for] `\r\n` and `\r`.
69 ///
70 /// See also [`normalized_value_with()`](Self::normalized_value_with).
71 ///
72 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
73 ///
74 /// NOTE: If you are using this in a context where the input is not controlled,
75 /// it is preferred to wrap the input stream in [`DecodingReader`] or to use
76 /// [`decoded_and_normalized_value()`](Self::decoded_and_normalized_value) instead.
77 ///
78 /// </div>
79 ///
80 /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
81 /// [`DecodingReader`]: ../../encoding/struct.DecodingReader.html
82 /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
83 /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
84 pub fn normalized_value(&self, version: XmlVersion) -> XmlResult<Cow<'a, str>> {
85 // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
86 self.normalized_value_with(version, 1, resolve_predefined_entity)
87 }
88
89 /// Returns the attribute value normalized as per [the XML specification] (or [for 1.0]),
90 /// using a custom entity resolver.
91 ///
92 /// The document **must** be UTF-8 encoded, or pre-processed using [`DecodingReader`].
93 ///
94 /// Do not use this method with HTML attributes.
95 ///
96 /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
97 ///
98 /// A function for resolving entities can be provided as `resolve_entity`.
99 /// This method does not resolve any predefined entities, but you can use
100 /// [`resolve_predefined_entity`] in your function.
101 ///
102 /// This will allocate unless the raw attribute value does not require normalization.
103 ///
104 /// Note, although you may use this library to parse HTML, you cannot use this
105 /// method to get HTML content, because its returns normalized value: the following
106 /// sequences are translated into a single space (U+0020) character:
107 ///
108 /// - `\r\n`
109 /// - `\r\x85` (only XML 1.1)
110 /// - `\r`
111 /// - `\n`
112 /// - `\t`
113 /// - `\x85` (only XML 1.1)
114 /// - `\x2028` (only XML 1.1)
115 ///
116 /// The text in HTML normally is not normalized in any way; normalization is
117 /// performed only in limited contexts and [only for] `\r\n` and `\r`.
118 ///
119 /// See also [`normalized_value()`](Self::normalized_value).
120 ///
121 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
122 ///
123 /// NOTE: If you are using this in a context where the input is not controlled,
124 /// it is preferred to wrap the input stream in [`DecodingReader`] or to use
125 /// [`decoded_and_normalized_value_with()`](Self::decoded_and_normalized_value_with) instead.
126 ///
127 /// </div>
128 ///
129 /// # Parameters
130 ///
131 /// - `depth`: maximum number of nested entities that can be expanded. If expansion
132 /// chain will be more that this value, the function will return [`EscapeError::TooManyNestedEntities`]
133 /// - `resolve_entity`: a function to resolve entity. This function could be called
134 /// multiple times on the same input and can return different values in each case
135 /// for the same input, although it is not recommended
136 ///
137 /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
138 /// [`DecodingReader`]: ../../encoding/struct.DecodingReader.html
139 /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
140 /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
141 /// [`EscapeError::TooManyNestedEntities`]: crate::escape::EscapeError::TooManyNestedEntities
142 pub fn normalized_value_with<'entity>(
143 &self,
144 version: XmlVersion,
145 depth: usize,
146 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
147 ) -> XmlResult<Cow<'a, str>> {
148 use crate::encoding::EncodingError;
149 use std::str::from_utf8;
150
151 let decoded = match &self.value {
152 Cow::Borrowed(bytes) => Cow::Borrowed(from_utf8(bytes).map_err(EncodingError::Utf8)?),
153 // Convert to owned, because otherwise Cow will be bound with wrong lifetime
154 Cow::Owned(bytes) => {
155 Cow::Owned(from_utf8(bytes).map_err(EncodingError::Utf8)?.to_owned())
156 }
157 };
158
159 match version.normalize_attribute_value(&decoded, depth, resolve_entity)? {
160 // Because result is borrowed, no replacements was done and we can use original string
161 Cow::Borrowed(_) => Ok(decoded),
162 Cow::Owned(s) => Ok(s.into()),
163 }
164 }
165
166 /// Decodes using a provided reader and returns the attribute value normalized
167 /// as per [the XML specification] (or [for 1.0]).
168 ///
169 /// Do not use this method with HTML attributes.
170 ///
171 /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
172 ///
173 /// The following escape sequences are replaced with their unescaped equivalents:
174 ///
175 /// | Escape Sequence | Replacement
176 /// |-----------------|------------
177 /// | `<` | `<`
178 /// | `>` | `>`
179 /// | `&` | `&`
180 /// | `'` | `'`
181 /// | `"` | `"`
182 ///
183 /// This will allocate unless the raw attribute value does not require normalization.
184 ///
185 /// Note, although you may use this library to parse HTML, you cannot use this
186 /// method to get HTML content, because its returns normalized value: the following
187 /// sequences are translated into a single space (U+0020) character:
188 ///
189 /// - `\r\n`
190 /// - `\r\x85` (only XML 1.1)
191 /// - `\r`
192 /// - `\n`
193 /// - `\t`
194 /// - `\x85` (only XML 1.1)
195 /// - `\x2028` (only XML 1.1)
196 ///
197 /// The text in HTML normally is not normalized in any way; normalization is
198 /// performed only in limited contexts and [only for] `\r\n` and `\r`.
199 ///
200 /// See also [`decoded_and_normalized_value_with()`](#method.decoded_and_normalized_value_with)
201 ///
202 /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
203 /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
204 /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
205 pub fn decoded_and_normalized_value(
206 &self,
207 version: XmlVersion,
208 decoder: Decoder,
209 ) -> XmlResult<Cow<'a, str>> {
210 // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
211 self.decoded_and_normalized_value_with(version, decoder, 1, resolve_predefined_entity)
212 }
213
214 /// Decodes using a provided reader and returns the attribute value normalized
215 /// as per [the XML specification] (or [for 1.0]), using a custom entity resolver.
216 ///
217 /// Do not use this method with HTML attributes.
218 ///
219 /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
220 ///
221 /// A function for resolving entities can be provided as `resolve_entity`.
222 /// This method does not resolve any predefined entities, but you can use
223 /// [`resolve_predefined_entity`] in your function.
224 ///
225 /// This will allocate unless the raw attribute value does not require normalization.
226 ///
227 /// Note, although you may use this library to parse HTML, you cannot use this
228 /// method to get HTML content, because its returns normalized value: the following
229 /// sequences are translated into a single space (U+0020) character:
230 ///
231 /// - `\r\n`
232 /// - `\r\x85` (only XML 1.1)
233 /// - `\r`
234 /// - `\n`
235 /// - `\t`
236 /// - `\x85` (only XML 1.1)
237 /// - `\x2028` (only XML 1.1)
238 ///
239 /// The text in HTML normally is not normalized in any way; normalization is
240 /// performed only in limited contexts and [only for] `\r\n` and `\r`.
241 ///
242 /// See also [`decoded_and_normalized_value()`](#method.decoded_and_normalized_value)
243 ///
244 /// # Parameters
245 ///
246 /// - `depth`: maximum number of nested entities that can be expanded. If expansion
247 /// chain will be more that this value, the function will return [`EscapeError::TooManyNestedEntities`]
248 /// - `resolve_entity`: a function to resolve entity. This function could be called
249 /// multiple times on the same input and can return different values in each case
250 /// for the same input, although it is not recommended
251 ///
252 /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
253 /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
254 /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
255 /// [`EscapeError::TooManyNestedEntities`]: crate::escape::EscapeError::TooManyNestedEntities
256 pub fn decoded_and_normalized_value_with<'entity>(
257 &self,
258 version: XmlVersion,
259 decoder: Decoder,
260 depth: usize,
261 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
262 ) -> XmlResult<Cow<'a, str>> {
263 let decoded = match &self.value {
264 Cow::Borrowed(bytes) => decoder.decode(bytes)?,
265 // Convert to owned, because otherwise Cow will be bound with wrong lifetime
266 Cow::Owned(bytes) => decoder.decode(bytes)?.into_owned().into(),
267 };
268
269 match version.normalize_attribute_value(&decoded, depth, resolve_entity)? {
270 // Because result is borrowed, no replacements was done and we can use original string
271 Cow::Borrowed(_) => Ok(decoded),
272 Cow::Owned(s) => Ok(s.into()),
273 }
274 }
275
276 /// Returns the unescaped value.
277 ///
278 /// This is normally the value you are interested in. Escape sequences such as `>` are
279 /// replaced with their unescaped equivalents such as `>`.
280 ///
281 /// This will allocate if the value contains any escape sequences.
282 ///
283 /// See also [`unescape_value_with()`](Self::unescape_value_with)
284 ///
285 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
286 ///
287 /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled,
288 /// should only be used by applications.
289 /// Libs should use [`decoded_and_normalized_value()`](Self::decoded_and_normalized_value)
290 /// instead, because if lib will be used in a project which depends on quick_xml with
291 /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification].
292 ///
293 /// </div>
294 ///
295 /// [`encoding`]: ../../index.html#encoding
296 /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification
297 #[cfg(any(doc, not(feature = "encoding")))]
298 #[deprecated = "use `Self::normalized_value()`"]
299 pub fn unescape_value(&self) -> XmlResult<Cow<'a, str>> {
300 // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
301 self.normalized_value_with(XmlVersion::Implicit1_0, 1, resolve_predefined_entity)
302 }
303
304 /// Decodes using UTF-8 then unescapes the value, using custom entities.
305 ///
306 /// This is normally the value you are interested in. Escape sequences such as `>` are
307 /// replaced with their unescaped equivalents such as `>`.
308 /// A fallback resolver for additional custom entities can be provided via
309 /// `resolve_entity`.
310 ///
311 /// This will allocate if the value contains any escape sequences.
312 ///
313 /// See also [`unescape_value()`](Self::unescape_value)
314 ///
315 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
316 ///
317 /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled,
318 /// should only be used by applications.
319 /// Libs should use [`decoded_and_normalized_value_with()`](Self::decoded_and_normalized_value_with)
320 /// instead, because if lib will be used in a project which depends on quick_xml with
321 /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification].
322 ///
323 /// </div>
324 ///
325 /// [`encoding`]: ../../index.html#encoding
326 /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification
327 #[cfg(any(doc, not(feature = "encoding")))]
328 #[deprecated = "use `Self::normalized_value_with()`"]
329 #[inline]
330 pub fn unescape_value_with<'entity>(
331 &self,
332 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
333 ) -> XmlResult<Cow<'a, str>> {
334 self.normalized_value_with(XmlVersion::Implicit1_0, 128, resolve_entity)
335 }
336
337 /// Decodes then unescapes the value.
338 ///
339 /// This will allocate if the value contains any escape sequences or in
340 /// non-UTF-8 encoding.
341 #[deprecated = "use `Self::decoded_and_normalized_value()`"]
342 pub fn decode_and_unescape_value(&self, decoder: Decoder) -> XmlResult<Cow<'a, str>> {
343 // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
344 self.decoded_and_normalized_value_with(
345 XmlVersion::Implicit1_0,
346 decoder,
347 1,
348 resolve_predefined_entity,
349 )
350 }
351
352 /// Decodes then unescapes the value with custom entities.
353 ///
354 /// This will allocate if the value contains any escape sequences or in
355 /// non-UTF-8 encoding.
356 #[deprecated = "use `Self::decoded_and_normalized_value_with()`"]
357 pub fn decode_and_unescape_value_with<'entity>(
358 &self,
359 decoder: Decoder,
360 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
361 ) -> XmlResult<Cow<'a, str>> {
362 self.decoded_and_normalized_value_with(
363 XmlVersion::Implicit1_0,
364 decoder,
365 128,
366 resolve_entity,
367 )
368 }
369
370 /// If attribute value [represents] valid boolean values, returns `Some`, otherwise returns `None`.
371 ///
372 /// The valid boolean representations are only `"true"`, `"false"`, `"1"`, and `"0"`.
373 ///
374 /// # Examples
375 ///
376 /// ```
377 /// # use pretty_assertions::assert_eq;
378 /// use quick_xml::events::attributes::Attribute;
379 ///
380 /// let attr = Attribute::from(("attr", "false"));
381 /// assert_eq!(attr.as_bool(), Some(false));
382 ///
383 /// let attr = Attribute::from(("attr", "0"));
384 /// assert_eq!(attr.as_bool(), Some(false));
385 ///
386 /// let attr = Attribute::from(("attr", "true"));
387 /// assert_eq!(attr.as_bool(), Some(true));
388 ///
389 /// let attr = Attribute::from(("attr", "1"));
390 /// assert_eq!(attr.as_bool(), Some(true));
391 ///
392 /// let attr = Attribute::from(("attr", "not bool"));
393 /// assert_eq!(attr.as_bool(), None);
394 /// ```
395 ///
396 /// [represents]: https://www.w3.org/TR/xmlschema11-2/#boolean
397 #[inline]
398 pub fn as_bool(&self) -> Option<bool> {
399 match self.value.as_ref() {
400 b"1" | b"true" => Some(true),
401 b"0" | b"false" => Some(false),
402 _ => None,
403 }
404 }
405}
406
407impl<'a> Debug for Attribute<'a> {
408 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
409 f.debug_struct("Attribute")
410 .field("key", &Bytes(self.key.as_ref()))
411 .field("value", &Bytes(&self.value))
412 .finish()
413 }
414}
415
416impl<'a> From<(&'a [u8], &'a [u8])> for Attribute<'a> {
417 /// Creates new attribute from raw bytes.
418 /// Does not apply any transformation to both key and value.
419 ///
420 /// # Examples
421 ///
422 /// ```
423 /// # use pretty_assertions::assert_eq;
424 /// use quick_xml::events::attributes::Attribute;
425 ///
426 /// let features = Attribute::from(("features".as_bytes(), "Bells & whistles".as_bytes()));
427 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
428 /// ```
429 fn from(val: (&'a [u8], &'a [u8])) -> Attribute<'a> {
430 Attribute {
431 key: QName(val.0),
432 value: Cow::from(val.1),
433 }
434 }
435}
436
437impl<'a> From<(&'a str, &'a str)> for Attribute<'a> {
438 /// Creates new attribute from text representation.
439 /// Key is stored as-is, but the value will be escaped.
440 ///
441 /// # Examples
442 ///
443 /// ```
444 /// # use pretty_assertions::assert_eq;
445 /// use quick_xml::events::attributes::Attribute;
446 ///
447 /// let features = Attribute::from(("features", "Bells & whistles"));
448 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
449 /// ```
450 fn from(val: (&'a str, &'a str)) -> Attribute<'a> {
451 Attribute {
452 key: QName(val.0.as_bytes()),
453 value: match escape(val.1) {
454 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
455 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
456 },
457 }
458 }
459}
460
461impl<'a> From<(&'a str, Cow<'a, str>)> for Attribute<'a> {
462 /// Creates new attribute from text representation.
463 /// Key is stored as-is, but the value will be escaped.
464 ///
465 /// # Examples
466 ///
467 /// ```
468 /// # use std::borrow::Cow;
469 /// use pretty_assertions::assert_eq;
470 /// use quick_xml::events::attributes::Attribute;
471 ///
472 /// let features = Attribute::from(("features", Cow::Borrowed("Bells & whistles")));
473 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
474 /// ```
475 fn from(val: (&'a str, Cow<'a, str>)) -> Attribute<'a> {
476 Attribute {
477 key: QName(val.0.as_bytes()),
478 value: match escape(val.1) {
479 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
480 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
481 },
482 }
483 }
484}
485
486impl<'a> From<Attr<&'a [u8]>> for Attribute<'a> {
487 #[inline]
488 fn from(attr: Attr<&'a [u8]>) -> Self {
489 Self {
490 key: attr.key(),
491 value: Cow::Borrowed(attr.value()),
492 }
493 }
494}
495
496////////////////////////////////////////////////////////////////////////////////////////////////////
497
498/// Iterator over XML attributes.
499///
500/// Yields `Result<Attribute>`. An `Err` will be yielded if an attribute is malformed or duplicated.
501/// The duplicate check can be turned off by calling [`with_checks(false)`].
502///
503/// When [`serialize`] feature is enabled, can be converted to serde's deserializer.
504///
505/// [`with_checks(false)`]: Self::with_checks
506/// [`serialize`]: ../../index.html#serialize
507#[derive(Clone)]
508pub struct Attributes<'a> {
509 /// Slice of `BytesStart` corresponding to attributes
510 bytes: &'a [u8],
511 /// Iterator state, independent from the actual source of bytes
512 state: IterState,
513 /// Encoding used for `bytes`
514 decoder: Decoder,
515}
516
517impl<'a> Attributes<'a> {
518 /// Internal constructor, used by `BytesStart`. Supplies data in reader's encoding
519 #[inline]
520 pub(crate) const fn wrap(buf: &'a [u8], pos: usize, html: bool, decoder: Decoder) -> Self {
521 Self {
522 bytes: buf,
523 state: IterState::new(pos, html),
524 decoder,
525 }
526 }
527
528 /// Creates a new attribute iterator from a buffer, which recognizes only XML-style
529 /// attributes, i. e. those which in the form `name = "value"` or `name = 'value'`.
530 /// HTML style attributes (i. e. without quotes or only name) will return a error.
531 ///
532 /// # Parameters
533 /// - `buf`: a buffer with a tag name and attributes, usually this is the whole
534 /// string between `<` and `>` (or `/>`) of a tag;
535 /// - `pos`: a position in the `buf` where tag name is finished and attributes
536 /// is started. It is not necessary to point exactly to the end of a tag name,
537 /// although that is usually that. If it will be more than the `buf` length,
538 /// then the iterator will return `None`` immediately.
539 ///
540 /// # Example
541 /// ```
542 /// # use quick_xml::events::attributes::{Attribute, Attributes};
543 /// # use pretty_assertions::assert_eq;
544 /// #
545 /// let mut iter = Attributes::new("tag-name attr1 = 'value1' attr2='value2' ", 9);
546 /// // ^0 ^9
547 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1")))));
548 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "value2")))));
549 /// assert_eq!(iter.next(), None);
550 /// ```
551 pub const fn new(buf: &'a str, pos: usize) -> Self {
552 Self::wrap(buf.as_bytes(), pos, false, Decoder::utf8())
553 }
554
555 /// Creates a new attribute iterator from a buffer, allowing HTML attribute syntax.
556 ///
557 /// # Parameters
558 /// - `buf`: a buffer with a tag name and attributes, usually this is the whole
559 /// string between `<` and `>` (or `/>`) of a tag;
560 /// - `pos`: a position in the `buf` where tag name is finished and attributes
561 /// is started. It is not necessary to point exactly to the end of a tag name,
562 /// although that is usually that. If it will be more than the `buf` length,
563 /// then the iterator will return `None`` immediately.
564 ///
565 /// # Example
566 /// ```
567 /// # use quick_xml::events::attributes::{Attribute, Attributes};
568 /// # use pretty_assertions::assert_eq;
569 /// #
570 /// let mut iter = Attributes::html("tag-name attr1 = value1 attr2 ", 9);
571 /// // ^0 ^9
572 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1")))));
573 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "")))));
574 /// assert_eq!(iter.next(), None);
575 /// ```
576 pub const fn html(buf: &'a str, pos: usize) -> Self {
577 Self::wrap(buf.as_bytes(), pos, true, Decoder::utf8())
578 }
579
580 /// Changes whether attributes should be checked for uniqueness.
581 ///
582 /// The XML specification requires attribute keys in the same element to be unique. This check
583 /// can be disabled to improve performance slightly.
584 ///
585 /// (`true` by default)
586 pub fn with_checks(&mut self, val: bool) -> &mut Attributes<'a> {
587 self.state.check_duplicates = val;
588 self
589 }
590
591 /// Checks if the current tag has a [`xsi:nil`] attribute. This method ignores any errors in
592 /// attributes.
593 ///
594 /// # Examples
595 ///
596 /// ```
597 /// # use pretty_assertions::assert_eq;
598 /// use quick_xml::events::Event;
599 /// use quick_xml::name::QName;
600 /// use quick_xml::reader::NsReader;
601 ///
602 /// let mut reader = NsReader::from_str("
603 /// <root xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'>
604 /// <true xsi:nil='true'/>
605 /// <false xsi:nil='false'/>
606 /// <none/>
607 /// <non-xsi xsi:nil='true' xmlns:xsi='namespace'/>
608 /// <unbound-nil nil='true' xmlns='http://www.w3.org/2001/XMLSchema-instance'/>
609 /// <another-xmlns f:nil='true' xmlns:f='http://www.w3.org/2001/XMLSchema-instance'/>
610 /// </root>
611 /// ");
612 /// reader.config_mut().trim_text(true);
613 ///
614 /// macro_rules! check {
615 /// ($reader:expr, $name:literal, $value:literal) => {
616 /// let event = match $reader.read_event().unwrap() {
617 /// Event::Empty(e) => e,
618 /// e => panic!("Unexpected event {:?}", e),
619 /// };
620 /// assert_eq!(
621 /// (event.name(), event.attributes().has_nil($reader.resolver())),
622 /// (QName($name.as_bytes()), $value),
623 /// );
624 /// };
625 /// }
626 ///
627 /// let root = match reader.read_event().unwrap() {
628 /// Event::Start(e) => e,
629 /// e => panic!("Unexpected event {:?}", e),
630 /// };
631 /// assert_eq!(root.attributes().has_nil(reader.resolver()), false);
632 ///
633 /// // definitely true
634 /// check!(reader, "true", true);
635 /// // definitely false
636 /// check!(reader, "false", false);
637 /// // absence of the attribute means that attribute is not set
638 /// check!(reader, "none", false);
639 /// // attribute not bound to the correct namespace
640 /// check!(reader, "non-xsi", false);
641 /// // attributes without prefix not bound to any namespace
642 /// check!(reader, "unbound-nil", false);
643 /// // prefix can be any while it is bound to the correct namespace
644 /// check!(reader, "another-xmlns", true);
645 /// ```
646 ///
647 /// [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil
648 pub fn has_nil(&mut self, resolver: &NamespaceResolver) -> bool {
649 use crate::name::ResolveResult::*;
650
651 self.any(|attr| {
652 if let Ok(attr) = attr {
653 match resolver.resolve_attribute(attr.key) {
654 (
655 Bound(Namespace(b"http://www.w3.org/2001/XMLSchema-instance")),
656 LocalName(b"nil"),
657 ) => attr.as_bool().unwrap_or_default(),
658 _ => false,
659 }
660 } else {
661 false
662 }
663 })
664 }
665
666 /// Get the decoder, used to decode bytes, read by the reader which produces
667 /// this iterator, to the strings.
668 ///
669 /// When iterator was created manually or get from a manually created [`BytesStart`],
670 /// encoding is UTF-8.
671 ///
672 /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
673 /// defaults to UTF-8.
674 ///
675 /// [`BytesStart`]: crate::events::BytesStart
676 /// [`encoding`]: ../index.html#encoding
677 #[inline]
678 pub const fn decoder(&self) -> Decoder {
679 self.decoder
680 }
681}
682
683impl<'a> Debug for Attributes<'a> {
684 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
685 f.debug_struct("Attributes")
686 .field("bytes", &Bytes(self.bytes))
687 .field("state", &self.state)
688 .field("decoder", &self.decoder)
689 .finish()
690 }
691}
692
693impl<'a> Iterator for Attributes<'a> {
694 type Item = Result<Attribute<'a>, AttrError>;
695
696 #[inline]
697 fn next(&mut self) -> Option<Self::Item> {
698 match self.state.next(self.bytes) {
699 None => None,
700 Some(Ok(a)) => Some(Ok(a.map(|range| &self.bytes[range]).into())),
701 Some(Err(e)) => Some(Err(e)),
702 }
703 }
704}
705
706impl<'a> FusedIterator for Attributes<'a> {}
707
708////////////////////////////////////////////////////////////////////////////////////////////////////
709
710/// Errors that can be raised during parsing attributes.
711///
712/// Recovery position in examples shows the position from which parsing of the
713/// next attribute will be attempted.
714#[derive(Clone, Debug, PartialEq, Eq)]
715pub enum AttrError {
716 /// Attribute key was not followed by `=`, position relative to the start of
717 /// the owning tag is provided.
718 ///
719 /// Example of input that raises this error:
720 ///
721 /// ```xml
722 /// <tag key another="attribute"/>
723 /// <!-- ^~~ error position, recovery position (8) -->
724 /// ```
725 ///
726 /// This error can be raised only when the iterator is in XML mode.
727 ExpectedEq(usize),
728 /// Attribute value was not found after `=`, position relative to the start
729 /// of the owning tag is provided.
730 ///
731 /// Example of input that raises this error:
732 ///
733 /// ```xml
734 /// <tag key = />
735 /// <!-- ^~~ error position, recovery position (10) -->
736 /// ```
737 ///
738 /// This error can be returned only for the last attribute in the list,
739 /// because otherwise any content after `=` will be threated as a value.
740 /// The XML
741 ///
742 /// ```xml
743 /// <tag key = another-key = "value"/>
744 /// <!-- ^ ^- recovery position (24) -->
745 /// <!-- '~~ error position (22) -->
746 /// ```
747 ///
748 /// will be treated as `Attribute { key = b"key", value = b"another-key" }`
749 /// and or [`Attribute`] is returned, or [`AttrError::UnquotedValue`] is raised,
750 /// depending on the parsing mode.
751 ExpectedValue(usize),
752 /// Attribute value is not quoted, position relative to the start of the
753 /// owning tag is provided.
754 ///
755 /// Example of input that raises this error:
756 ///
757 /// ```xml
758 /// <tag key = value />
759 /// <!-- ^ ^~~ recovery position (15) -->
760 /// <!-- '~~ error position (10) -->
761 /// ```
762 ///
763 /// This error can be raised only when the iterator is in XML mode.
764 UnquotedValue(usize),
765 /// Attribute value was not finished with a matching quote, position relative
766 /// to the start of owning tag and a quote is provided. That position is always
767 /// a last character in the tag content.
768 ///
769 /// Example of input that raises this error:
770 ///
771 /// ```xml
772 /// <tag key = "value />
773 /// <tag key = 'value />
774 /// <!-- ^~~ error position, recovery position (18) -->
775 /// ```
776 ///
777 /// This error can be returned only for the last attribute in the list,
778 /// because all input was consumed during scanning for a quote.
779 ExpectedQuote(usize, u8),
780 /// An attribute with the same name was already encountered. Two parameters
781 /// define (1) the error position relative to the start of the owning tag
782 /// for a new attribute and (2) the start position of a previously encountered
783 /// attribute with the same name.
784 ///
785 /// Example of input that raises this error:
786 ///
787 /// ```xml
788 /// <tag key = 'value' key="value2" attr3='value3' />
789 /// <!-- ^ ^ ^~~ recovery position (32) -->
790 /// <!-- | '~~ error position (19) -->
791 /// <!-- '~~ previous position (4) -->
792 /// ```
793 ///
794 /// This error is returned only when [`Attributes::with_checks()`] is set
795 /// to `true` (that is default behavior).
796 Duplicated(usize, usize),
797}
798
799impl Display for AttrError {
800 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
801 match self {
802 Self::ExpectedEq(pos) => write!(
803 f,
804 r#"position {}: attribute key must be directly followed by `=` or space"#,
805 pos
806 ),
807 Self::ExpectedValue(pos) => write!(
808 f,
809 r#"position {}: `=` must be followed by an attribute value"#,
810 pos
811 ),
812 Self::UnquotedValue(pos) => write!(
813 f,
814 r#"position {}: attribute value must be enclosed in `"` or `'`"#,
815 pos
816 ),
817 Self::ExpectedQuote(pos, quote) => write!(
818 f,
819 r#"position {}: missing closing quote `{}` in attribute value"#,
820 pos, *quote as char
821 ),
822 Self::Duplicated(pos1, pos2) => write!(
823 f,
824 r#"position {}: duplicated attribute, previous declaration at position {}"#,
825 pos1, pos2
826 ),
827 }
828 }
829}
830
831impl std::error::Error for AttrError {}
832
833////////////////////////////////////////////////////////////////////////////////////////////////////
834
835/// A struct representing a key/value XML or HTML [attribute].
836///
837/// [attribute]: https://www.w3.org/TR/xml11/#NT-Attribute
838#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
839pub enum Attr<T> {
840 /// Attribute with value enclosed in double quotes (`"`). Attribute key and
841 /// value provided. This is a canonical XML-style attribute.
842 DoubleQ(T, T),
843 /// Attribute with value enclosed in single quotes (`'`). Attribute key and
844 /// value provided. This is an XML-style attribute.
845 SingleQ(T, T),
846 /// Attribute with value not enclosed in quotes. Attribute key and value
847 /// provided. This is HTML-style attribute, it can be returned in HTML-mode
848 /// parsing only. In an XML mode [`AttrError::UnquotedValue`] will be raised
849 /// instead.
850 ///
851 /// Attribute value can be invalid according to the [HTML specification],
852 /// in particular, it can contain `"`, `'`, `=`, `<`, and <code>`</code>
853 /// characters. The absence of the `>` character is nevertheless guaranteed,
854 /// since the parser extracts [events] based on them even before the start
855 /// of parsing attributes.
856 ///
857 /// [HTML specification]: https://html.spec.whatwg.org/#unquoted
858 /// [events]: crate::events::Event::Start
859 Unquoted(T, T),
860 /// Attribute without value. Attribute key provided. This is HTML-style attribute,
861 /// it can be returned in HTML-mode parsing only. In XML mode
862 /// [`AttrError::ExpectedEq`] will be raised instead.
863 Empty(T),
864}
865
866impl<T> Attr<T> {
867 /// Maps an `Attr<T>` to `Attr<U>` by applying a function to a contained key and value.
868 #[inline]
869 pub fn map<U, F>(self, mut f: F) -> Attr<U>
870 where
871 F: FnMut(T) -> U,
872 {
873 match self {
874 Attr::DoubleQ(key, value) => Attr::DoubleQ(f(key), f(value)),
875 Attr::SingleQ(key, value) => Attr::SingleQ(f(key), f(value)),
876 Attr::Empty(key) => Attr::Empty(f(key)),
877 Attr::Unquoted(key, value) => Attr::Unquoted(f(key), f(value)),
878 }
879 }
880}
881
882impl<'a> Attr<&'a [u8]> {
883 /// Returns the key value
884 #[inline]
885 pub const fn key(&self) -> QName<'a> {
886 QName(match self {
887 Attr::DoubleQ(key, _) => key,
888 Attr::SingleQ(key, _) => key,
889 Attr::Empty(key) => key,
890 Attr::Unquoted(key, _) => key,
891 })
892 }
893 /// Returns the attribute value. For [`Self::Empty`] variant an empty slice
894 /// is returned according to the [HTML specification].
895 ///
896 /// [HTML specification]: https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#syntax-attr-empty
897 #[inline]
898 pub const fn value(&self) -> &'a [u8] {
899 match self {
900 Attr::DoubleQ(_, value) => value,
901 Attr::SingleQ(_, value) => value,
902 Attr::Empty(_) => &[],
903 Attr::Unquoted(_, value) => value,
904 }
905 }
906}
907
908impl<T: AsRef<[u8]>> Debug for Attr<T> {
909 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
910 match self {
911 Attr::DoubleQ(key, value) => f
912 .debug_tuple("Attr::DoubleQ")
913 .field(&Bytes(key.as_ref()))
914 .field(&Bytes(value.as_ref()))
915 .finish(),
916 Attr::SingleQ(key, value) => f
917 .debug_tuple("Attr::SingleQ")
918 .field(&Bytes(key.as_ref()))
919 .field(&Bytes(value.as_ref()))
920 .finish(),
921 Attr::Empty(key) => f
922 .debug_tuple("Attr::Empty")
923 // Comment to prevent formatting and keep style consistent
924 .field(&Bytes(key.as_ref()))
925 .finish(),
926 Attr::Unquoted(key, value) => f
927 .debug_tuple("Attr::Unquoted")
928 .field(&Bytes(key.as_ref()))
929 .field(&Bytes(value.as_ref()))
930 .finish(),
931 }
932 }
933}
934
935/// Unpacks attribute key and value into tuple of this two elements.
936/// `None` value element is returned only for [`Attr::Empty`] variant.
937impl<T> From<Attr<T>> for (T, Option<T>) {
938 #[inline]
939 fn from(attr: Attr<T>) -> Self {
940 match attr {
941 Attr::DoubleQ(key, value) => (key, Some(value)),
942 Attr::SingleQ(key, value) => (key, Some(value)),
943 Attr::Empty(key) => (key, None),
944 Attr::Unquoted(key, value) => (key, Some(value)),
945 }
946 }
947}
948
949////////////////////////////////////////////////////////////////////////////////////////////////////
950
951type AttrResult = Result<Attr<Range<usize>>, AttrError>;
952
953#[derive(Clone, Copy, Debug)]
954enum State {
955 /// Iteration finished, iterator will return `None` to all [`IterState::next`]
956 /// requests.
957 Done,
958 /// The last attribute returned was deserialized successfully. Contains an
959 /// offset from which next attribute should be searched.
960 Next(usize),
961 /// The last attribute returns [`AttrError::UnquotedValue`], offset pointed
962 /// to the beginning of the value. Recover should skip a value
963 SkipValue(usize),
964 /// The last attribute returns [`AttrError::Duplicated`], offset pointed to
965 /// the equal (`=`) sign. Recover should skip it and a value
966 SkipEqValue(usize),
967}
968
969/// Number of attributes a start tag may have before the duplicate-name check
970/// switches from a direct linear scan of the previously seen names to a hash
971/// pre-filter (see [`IterState::check_for_duplicates`]).
972///
973/// Real-world start tags carry only a handful of attributes -- the busiest
974/// element in our benchmark corpus (`tests/documents/players.xml`) has 22 --
975/// where the scan is faster than hashing and needs no allocation. Larger tags
976/// are where the scan became the O(N²) CPU-DoS of [#969], so above this count we
977/// pay for a hash set to keep the whole tag O(N). The value sits just above the
978/// measured linear-vs-hash crossover.
979///
980/// [#969]: https://github.com/tafia/quick-xml/issues/969
981const SMALL_ATTRIBUTE_COUNT: usize = 32;
982
983/// A no-op [`Hasher`] for the `key_hashes` set, whose values are already 64-bit
984/// hashes of attribute names; re-hashing them with the default SipHash would be
985/// wasted work. Only `write_u64` is ever exercised (via `u64`'s `Hash` impl).
986#[derive(Default)]
987struct IdentityHasher(u64);
988
989impl Hasher for IdentityHasher {
990 #[inline]
991 fn finish(&self) -> u64 {
992 self.0
993 }
994
995 #[inline]
996 fn write(&mut self, _: &[u8]) {
997 // The set only ever stores `u64` keys, which route through `write_u64`.
998 unreachable!("IdentityHasher only supports u64 keys")
999 }
1000
1001 #[inline]
1002 fn write_u64(&mut self, n: u64) {
1003 self.0 = n;
1004 }
1005}
1006
1007/// Hashes a single attribute name. A fresh [`DefaultHasher`] per name keeps each
1008/// hash independent (so it is also DoS-resistant on untrusted input).
1009#[inline]
1010fn hash_name(name: &[u8]) -> u64 {
1011 let mut hasher = DefaultHasher::new();
1012 hasher.write(name);
1013 hasher.finish()
1014}
1015
1016/// External iterator over spans of attribute key and value
1017#[derive(Clone, Debug)]
1018pub(crate) struct IterState {
1019 /// Iteration state that determines what actions should be done before the
1020 /// actual parsing of the next attribute
1021 state: State,
1022 /// If `true`, enables ability to parse unquoted values and key-only (empty)
1023 /// attributes
1024 html: bool,
1025 /// If `true`, checks for duplicate names
1026 check_duplicates: bool,
1027 /// If `check_duplicates` is set, contains the ranges of already parsed attribute
1028 /// names. We store a ranges instead of slices to able to report a previous
1029 /// attribute position
1030 keys: Vec<Range<usize>>,
1031 /// 64-bit hashes of the byte content of `keys`, used as an O(1) pre-filter
1032 /// once a start tag declares more than `SMALL_ATTRIBUTE_COUNT` attributes, so
1033 /// the duplicate check stays O(N) over the whole tag instead of O(N²). The
1034 /// values are already hashes, so the set stores them with `IdentityHasher`
1035 /// instead of re-hashing. Allocated only when the threshold is crossed, so
1036 /// small tags (and [`IterState::new`]) stay allocation-free and `const`.
1037 key_hashes: Option<HashSet<u64, BuildHasherDefault<IdentityHasher>>>,
1038}
1039
1040impl IterState {
1041 pub const fn new(offset: usize, html: bool) -> Self {
1042 Self {
1043 state: State::Next(offset),
1044 html,
1045 check_duplicates: true,
1046 keys: Vec::new(),
1047 key_hashes: None,
1048 }
1049 }
1050
1051 /// Recover from an error that could have been made on a previous step.
1052 /// Returns an offset from which parsing should continue.
1053 /// If there no input left, returns `None`.
1054 fn recover(&self, slice: &[u8]) -> Option<usize> {
1055 match self.state {
1056 State::Done => None,
1057 State::Next(offset) => Some(offset),
1058 State::SkipValue(offset) => self.skip_value(slice, offset),
1059 State::SkipEqValue(offset) => self.skip_eq_value(slice, offset),
1060 }
1061 }
1062
1063 /// Skip all characters up to first space symbol or end-of-input
1064 #[inline]
1065 #[allow(clippy::manual_map)]
1066 fn skip_value(&self, slice: &[u8], offset: usize) -> Option<usize> {
1067 let mut iter = (offset..).zip(slice[offset..].iter());
1068
1069 match iter.find(|(_, &b)| is_whitespace(b)) {
1070 // Input: ` key = value `
1071 // | ^
1072 // offset e
1073 Some((e, _)) => Some(e),
1074 // Input: ` key = value`
1075 // | ^
1076 // offset e = len()
1077 None => None,
1078 }
1079 }
1080
1081 /// Skip all characters up to first space symbol or end-of-input
1082 #[inline]
1083 fn skip_eq_value(&self, slice: &[u8], offset: usize) -> Option<usize> {
1084 let mut iter = (offset..).zip(slice[offset..].iter());
1085
1086 // Skip all up to the quote and get the quote type
1087 let quote = match iter.find(|(_, &b)| !is_whitespace(b)) {
1088 // Input: ` key = "`
1089 // | ^
1090 // offset
1091 Some((_, b'"')) => b'"',
1092 // Input: ` key = '`
1093 // | ^
1094 // offset
1095 Some((_, b'\'')) => b'\'',
1096
1097 // Input: ` key = x`
1098 // | ^
1099 // offset
1100 Some((offset, _)) => return self.skip_value(slice, offset),
1101 // Input: ` key = `
1102 // | ^
1103 // offset
1104 None => return None,
1105 };
1106
1107 match iter.find(|(_, &b)| b == quote) {
1108 // Input: ` key = " "`
1109 // ^
1110 Some((e, b'"')) => Some(e),
1111 // Input: ` key = ' '`
1112 // ^
1113 Some((e, _)) => Some(e),
1114
1115 // Input: ` key = " `
1116 // Input: ` key = ' `
1117 // ^
1118 // Closing quote not found
1119 None => None,
1120 }
1121 }
1122
1123 /// Checks that the attribute name `key` (a range into `slice`) was not seen
1124 /// earlier in the same start tag, recording it for subsequent checks.
1125 ///
1126 /// Small tags use a direct linear scan of [`Self::keys`]: for a handful of
1127 /// attributes that beats hashing and needs no allocation, which is the
1128 /// overwhelmingly common case. Once a tag declares more than
1129 /// `SMALL_ATTRIBUTE_COUNT` attributes -- where the scan would become the
1130 /// O(N²) CPU-DoS of [#969] -- it switches to a hash pre-filter that keeps the
1131 /// whole tag O(N).
1132 ///
1133 /// [#969]: https://github.com/tafia/quick-xml/issues/969
1134 #[inline]
1135 fn check_for_duplicates(
1136 &mut self,
1137 slice: &[u8],
1138 key: Range<usize>,
1139 ) -> Result<Range<usize>, AttrError> {
1140 if self.check_duplicates {
1141 if self.keys.len() >= SMALL_ATTRIBUTE_COUNT {
1142 return self.check_for_duplicates_hashed(slice, key);
1143 }
1144 if let Some(prev) = self
1145 .keys
1146 .iter()
1147 .find(|r| slice[(*r).clone()] == slice[key.clone()])
1148 {
1149 return Err(AttrError::Duplicated(key.start, prev.start));
1150 }
1151 self.keys.push(key.clone());
1152 }
1153 Ok(key)
1154 }
1155
1156 /// Cold path of [`Self::check_for_duplicates`] for start tags with many
1157 /// attributes: a [`HashSet`] of 64-bit name hashes acts as an O(1) pre-filter
1158 /// so iterating N attributes is O(N) rather than O(N²).
1159 #[cold]
1160 fn check_for_duplicates_hashed(
1161 &mut self,
1162 slice: &[u8],
1163 key: Range<usize>,
1164 ) -> Result<Range<usize>, AttrError> {
1165 let keys = &self.keys;
1166 let key_hashes = self.key_hashes.get_or_insert_with(|| {
1167 // First time over the threshold: seed the set with the names already
1168 // collected during the linear phase so the pre-filter knows them.
1169 let mut set = HashSet::with_capacity_and_hasher(
1170 keys.len() * 2,
1171 BuildHasherDefault::<IdentityHasher>::default(),
1172 );
1173 for r in keys {
1174 set.insert(hash_name(&slice[r.clone()]));
1175 }
1176 set
1177 });
1178 // A fresh hash proves the name is new. On a hit (a real duplicate, or the
1179 // astronomically rare 64-bit collision) fall back to the linear scan to
1180 // recover the exact previous position for `AttrError::Duplicated`.
1181 if !key_hashes.insert(hash_name(&slice[key.clone()])) {
1182 if let Some(prev) = self
1183 .keys
1184 .iter()
1185 .find(|r| slice[(*r).clone()] == slice[key.clone()])
1186 {
1187 return Err(AttrError::Duplicated(key.start, prev.start));
1188 }
1189 }
1190 self.keys.push(key.clone());
1191 Ok(key)
1192 }
1193
1194 /// # Parameters
1195 ///
1196 /// - `slice`: content of the tag, used for checking for duplicates
1197 /// - `key`: Range of key in slice, if iterator in HTML mode
1198 /// - `offset`: Position of error if iterator in XML mode
1199 #[inline]
1200 fn key_only(&mut self, slice: &[u8], key: Range<usize>, offset: usize) -> Option<AttrResult> {
1201 Some(if self.html {
1202 self.check_for_duplicates(slice, key).map(Attr::Empty)
1203 } else {
1204 Err(AttrError::ExpectedEq(offset))
1205 })
1206 }
1207
1208 #[inline]
1209 fn double_q(&mut self, key: Range<usize>, value: Range<usize>) -> Option<AttrResult> {
1210 self.state = State::Next(value.end + 1); // +1 for `"`
1211
1212 Some(Ok(Attr::DoubleQ(key, value)))
1213 }
1214
1215 #[inline]
1216 fn single_q(&mut self, key: Range<usize>, value: Range<usize>) -> Option<AttrResult> {
1217 self.state = State::Next(value.end + 1); // +1 for `'`
1218
1219 Some(Ok(Attr::SingleQ(key, value)))
1220 }
1221
1222 pub fn next(&mut self, slice: &[u8]) -> Option<AttrResult> {
1223 let mut iter = match self.recover(slice) {
1224 Some(offset) => (offset..).zip(slice[offset..].iter()),
1225 None => return None,
1226 };
1227
1228 // Index where next key started
1229 let start_key = match iter.find(|(_, &b)| !is_whitespace(b)) {
1230 // Input: ` key`
1231 // ^
1232 Some((s, _)) => s,
1233 // Input: ` `
1234 // ^
1235 None => {
1236 // Because we reach end-of-input, stop iteration on next call
1237 self.state = State::Done;
1238 return None;
1239 }
1240 };
1241 // Span of a key
1242 let (key, offset) = match iter.find(|(_, &b)| b == b'=' || is_whitespace(b)) {
1243 // Input: ` key=`
1244 // | ^
1245 // s e
1246 Some((e, b'=')) => (start_key..e, e),
1247
1248 // Input: ` key `
1249 // ^
1250 Some((e, _)) => match iter.find(|(_, &b)| !is_whitespace(b)) {
1251 // Input: ` key =`
1252 // | | ^
1253 // start_key e
1254 Some((offset, b'=')) => (start_key..e, offset),
1255 // Input: ` key x`
1256 // | | ^
1257 // start_key e
1258 // If HTML-like attributes is allowed, this is the result, otherwise error
1259 Some((offset, _)) => {
1260 // In any case, recovering is not required
1261 self.state = State::Next(offset);
1262 return self.key_only(slice, start_key..e, offset);
1263 }
1264 // Input: ` key `
1265 // | | ^
1266 // start_key e
1267 // If HTML-like attributes is allowed, this is the result, otherwise error
1268 None => {
1269 // Because we reach end-of-input, stop iteration on next call
1270 self.state = State::Done;
1271 return self.key_only(slice, start_key..e, slice.len());
1272 }
1273 },
1274
1275 // Input: ` key`
1276 // | ^
1277 // s e = len()
1278 // If HTML-like attributes is allowed, this is the result, otherwise error
1279 None => {
1280 // Because we reach end-of-input, stop iteration on next call
1281 self.state = State::Done;
1282 let e = slice.len();
1283 return self.key_only(slice, start_key..e, e);
1284 }
1285 };
1286
1287 let key = match self.check_for_duplicates(slice, key) {
1288 Err(e) => {
1289 self.state = State::SkipEqValue(offset);
1290 return Some(Err(e));
1291 }
1292 Ok(key) => key,
1293 };
1294
1295 ////////////////////////////////////////////////////////////////////////
1296
1297 // Gets the position of quote and quote type
1298 let (start_value, quote) = match iter.find(|(_, &b)| !is_whitespace(b)) {
1299 // Input: ` key = "`
1300 // ^
1301 Some((s, b'"')) => (s + 1, b'"'),
1302 // Input: ` key = '`
1303 // ^
1304 Some((s, b'\'')) => (s + 1, b'\''),
1305
1306 // Input: ` key = x`
1307 // ^
1308 // If HTML-like attributes is allowed, this is the start of the value
1309 Some((s, _)) if self.html => {
1310 // We do not check validity of attribute value characters as required
1311 // according to https://html.spec.whatwg.org/#unquoted. It can be done
1312 // during validation phase
1313 let end = match iter.find(|(_, &b)| is_whitespace(b)) {
1314 // Input: ` key = value `
1315 // | ^
1316 // s e
1317 Some((e, _)) => e,
1318 // Input: ` key = value`
1319 // | ^
1320 // s e = len()
1321 None => slice.len(),
1322 };
1323 self.state = State::Next(end);
1324 return Some(Ok(Attr::Unquoted(key, s..end)));
1325 }
1326 // Input: ` key = x`
1327 // ^
1328 Some((s, _)) => {
1329 self.state = State::SkipValue(s);
1330 return Some(Err(AttrError::UnquotedValue(s)));
1331 }
1332
1333 // Input: ` key = `
1334 // ^
1335 None => {
1336 // Because we reach end-of-input, stop iteration on next call
1337 self.state = State::Done;
1338 return Some(Err(AttrError::ExpectedValue(slice.len())));
1339 }
1340 };
1341
1342 match iter.find(|(_, &b)| b == quote) {
1343 // Input: ` key = " "`
1344 // ^
1345 Some((e, b'"')) => self.double_q(key, start_value..e),
1346 // Input: ` key = ' '`
1347 // ^
1348 Some((e, _)) => self.single_q(key, start_value..e),
1349
1350 // Input: ` key = " `
1351 // Input: ` key = ' `
1352 // ^
1353 // Closing quote not found
1354 None => {
1355 // Because we reach end-of-input, stop iteration on next call
1356 self.state = State::Done;
1357 Some(Err(AttrError::ExpectedQuote(slice.len(), quote)))
1358 }
1359 }
1360 }
1361}
1362
1363////////////////////////////////////////////////////////////////////////////////////////////////////
1364
1365/// Checks, how parsing of XML-style attributes works. Each attribute should
1366/// have a value, enclosed in single or double quotes.
1367#[cfg(test)]
1368mod xml {
1369 use super::*;
1370 use pretty_assertions::assert_eq;
1371
1372 mod attribute_value_normalization {
1373 use super::*;
1374 use crate::errors::Error;
1375 use crate::escape::EscapeError::*;
1376 use crate::XmlVersion::*;
1377 use pretty_assertions::assert_eq;
1378
1379 /// Empty values returned are unchanged
1380 #[test]
1381 fn empty() {
1382 let raw_value = "".as_bytes();
1383 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1384
1385 let value = attr
1386 .decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1387 .unwrap();
1388 assert_eq!(value, "");
1389 // assert_eq! does not check if value is borrowed, but this is important
1390 assert!(matches!(value, Cow::Borrowed(_)));
1391
1392 let value = attr
1393 .decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1394 .unwrap();
1395 assert_eq!(value, "");
1396 // assert_eq! does not check if value is borrowed, but this is important
1397 assert!(matches!(value, Cow::Borrowed(_)));
1398
1399 let value = attr
1400 .decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1401 .unwrap();
1402 assert_eq!(value, "");
1403 // assert_eq! does not check if value is borrowed, but this is important
1404 assert!(matches!(value, Cow::Borrowed(_)));
1405 }
1406
1407 /// Already normalized values are returned unchanged
1408 #[test]
1409 fn already_normalized() {
1410 let raw_value = "foobar123".as_bytes();
1411 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1412
1413 let value = attr
1414 .decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1415 .unwrap();
1416 assert_eq!(value, "foobar123");
1417 // assert_eq! does not check if value is borrowed, but this is important
1418 assert!(matches!(value, Cow::Borrowed(_)));
1419
1420 let value = attr
1421 .decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1422 .unwrap();
1423 assert_eq!(value, "foobar123");
1424 // assert_eq! does not check if value is borrowed, but this is important
1425 assert!(matches!(value, Cow::Borrowed(_)));
1426
1427 let value = attr
1428 .decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1429 .unwrap();
1430 assert_eq!(value, "foobar123");
1431 // assert_eq! does not check if value is borrowed, but this is important
1432 assert!(matches!(value, Cow::Borrowed(_)));
1433 }
1434
1435 /// Return, tab, and newline characters (0xD, 0x9, 0xA) must be substituted with
1436 /// a space character, \r\n and \r\u{85} should be replaced by one space in 1.1
1437 #[test]
1438 fn space_replacement() {
1439 let raw_value = "\r\nfoo\u{85}\u{2028}\rbar\tbaz\n\ndelta\n\r\u{85}".as_bytes();
1440 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1441
1442 assert_eq!(
1443 attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1444 .unwrap(),
1445 " foo\u{85}\u{2028} bar baz delta \u{85}"
1446 );
1447 assert_eq!(
1448 attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1449 .unwrap(),
1450 " foo\u{85}\u{2028} bar baz delta \u{85}"
1451 );
1452 assert_eq!(
1453 attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1454 .unwrap(),
1455 " foo bar baz delta "
1456 );
1457 }
1458
1459 /// Entities must be terminated
1460 #[test]
1461 fn unterminated_entity() {
1462 let raw_value = "abc"def".as_bytes();
1463 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1464
1465 match attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8()) {
1466 Err(Error::Escape(err)) => assert_eq!(err, UnterminatedEntity(3..11)),
1467 x => panic!("Expected Err(Escape(_)), got {:?}", x),
1468 }
1469
1470 match attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8()) {
1471 Err(Error::Escape(err)) => assert_eq!(err, UnterminatedEntity(3..11)),
1472 x => panic!("Expected Err(Escape(_)), got {:?}", x),
1473 }
1474
1475 match attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8()) {
1476 Err(Error::Escape(err)) => assert_eq!(err, UnterminatedEntity(3..11)),
1477 x => panic!("Expected Err(Escape(_)), got {:?}", x),
1478 }
1479 }
1480
1481 /// Unknown entities raise error
1482 #[test]
1483 fn unrecognized_entity() {
1484 let raw_value = "abc&unkn;def".as_bytes();
1485 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1486
1487 match attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8()) {
1488 // TODO: is this divergence between range behavior of UnterminatedEntity
1489 // and UnrecognizedEntity appropriate? existing unescape code behaves the same. (see: start index)
1490 Err(Error::Escape(err)) => {
1491 assert_eq!(err, UnrecognizedEntity(4..8, "unkn".to_owned()))
1492 }
1493 x => panic!("Expected Err(Escape(err)), got {:?}", x),
1494 }
1495 match attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8()) {
1496 // TODO: is this divergence between range behavior of UnterminatedEntity
1497 // and UnrecognizedEntity appropriate? existing unescape code behaves the same. (see: start index)
1498 Err(Error::Escape(err)) => {
1499 assert_eq!(err, UnrecognizedEntity(4..8, "unkn".to_owned()))
1500 }
1501 x => panic!("Expected Err(Escape(err)), got {:?}", x),
1502 }
1503 match attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8()) {
1504 // TODO: is this divergence between range behavior of UnterminatedEntity
1505 // and UnrecognizedEntity appropriate? existing unescape code behaves the same. (see: start index)
1506 Err(Error::Escape(err)) => {
1507 assert_eq!(err, UnrecognizedEntity(4..8, "unkn".to_owned()))
1508 }
1509 x => panic!("Expected Err(Escape(err)), got {:?}", x),
1510 }
1511 }
1512
1513 /// custom entity replacement works, entity replacement text processed recursively
1514 #[test]
1515 fn entity_replacement() {
1516 let raw_value = "&d;&d;A&a; &a;B&da;".as_bytes();
1517 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1518 fn custom_resolver(ent: &str) -> Option<&'static str> {
1519 match ent {
1520 "d" => Some("
"),
1521 "a" => Some("
"),
1522 "da" => Some("
"),
1523 _ => None,
1524 }
1525 }
1526
1527 assert_eq!(
1528 attr.decoded_and_normalized_value_with(
1529 Implicit1_0,
1530 Decoder::utf8(),
1531 5,
1532 &custom_resolver
1533 )
1534 .unwrap(),
1535 "\r\rA\n \nB\r\n"
1536 );
1537 assert_eq!(
1538 attr.decoded_and_normalized_value_with(
1539 Explicit1_0,
1540 Decoder::utf8(),
1541 5,
1542 &custom_resolver
1543 )
1544 .unwrap(),
1545 "\r\rA\n \nB\r\n"
1546 );
1547 assert_eq!(
1548 attr.decoded_and_normalized_value_with(
1549 Explicit1_1,
1550 Decoder::utf8(),
1551 5,
1552 &custom_resolver
1553 )
1554 .unwrap(),
1555 "\r\rA\n \nB\r\n"
1556 );
1557 }
1558
1559 #[test]
1560 fn char_references() {
1561 // character literal references are substituted without being replaced by spaces
1562 let raw_value = "

A

B
".as_bytes();
1563 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1564
1565 assert_eq!(
1566 attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1567 .unwrap(),
1568 "\r\rA\n\nB\r\n"
1569 );
1570 assert_eq!(
1571 attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1572 .unwrap(),
1573 "\r\rA\n\nB\r\n"
1574 );
1575 assert_eq!(
1576 attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1577 .unwrap(),
1578 "\r\rA\n\nB\r\n"
1579 );
1580 }
1581 }
1582
1583 /// Checked attribute is the single attribute
1584 mod single {
1585 use super::*;
1586 use pretty_assertions::assert_eq;
1587
1588 /// Attribute have a value enclosed in single quotes
1589 #[test]
1590 fn single_quoted() {
1591 let mut iter = Attributes::new(r#"tag key='value'"#, 3);
1592
1593 assert_eq!(
1594 iter.next(),
1595 Some(Ok(Attribute {
1596 key: QName(b"key"),
1597 value: Cow::Borrowed(b"value"),
1598 }))
1599 );
1600 assert_eq!(iter.next(), None);
1601 assert_eq!(iter.next(), None);
1602 }
1603
1604 /// Attribute have a value enclosed in double quotes
1605 #[test]
1606 fn double_quoted() {
1607 let mut iter = Attributes::new(r#"tag key="value""#, 3);
1608
1609 assert_eq!(
1610 iter.next(),
1611 Some(Ok(Attribute {
1612 key: QName(b"key"),
1613 value: Cow::Borrowed(b"value"),
1614 }))
1615 );
1616 assert_eq!(iter.next(), None);
1617 assert_eq!(iter.next(), None);
1618 }
1619
1620 /// Attribute have a value, not enclosed in quotes
1621 #[test]
1622 fn unquoted() {
1623 let mut iter = Attributes::new(r#"tag key=value"#, 3);
1624 // 0 ^ = 8
1625
1626 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8))));
1627 assert_eq!(iter.next(), None);
1628 assert_eq!(iter.next(), None);
1629 }
1630
1631 /// Only attribute key is present
1632 #[test]
1633 fn key_only() {
1634 let mut iter = Attributes::new(r#"tag key"#, 3);
1635 // 0 ^ = 7
1636
1637 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(7))));
1638 assert_eq!(iter.next(), None);
1639 assert_eq!(iter.next(), None);
1640 }
1641
1642 /// Key is started with an invalid symbol (a single quote in this test).
1643 /// Because we do not check validity of keys and values during parsing,
1644 /// that invalid attribute will be returned
1645 #[test]
1646 fn key_start_invalid() {
1647 let mut iter = Attributes::new(r#"tag 'key'='value'"#, 3);
1648
1649 assert_eq!(
1650 iter.next(),
1651 Some(Ok(Attribute {
1652 key: QName(b"'key'"),
1653 value: Cow::Borrowed(b"value"),
1654 }))
1655 );
1656 assert_eq!(iter.next(), None);
1657 assert_eq!(iter.next(), None);
1658 }
1659
1660 /// Key contains an invalid symbol (an ampersand in this test).
1661 /// Because we do not check validity of keys and values during parsing,
1662 /// that invalid attribute will be returned
1663 #[test]
1664 fn key_contains_invalid() {
1665 let mut iter = Attributes::new(r#"tag key&jey='value'"#, 3);
1666
1667 assert_eq!(
1668 iter.next(),
1669 Some(Ok(Attribute {
1670 key: QName(b"key&jey"),
1671 value: Cow::Borrowed(b"value"),
1672 }))
1673 );
1674 assert_eq!(iter.next(), None);
1675 assert_eq!(iter.next(), None);
1676 }
1677
1678 /// Attribute value is missing after `=`
1679 #[test]
1680 fn missed_value() {
1681 let mut iter = Attributes::new(r#"tag key="#, 3);
1682 // 0 ^ = 8
1683
1684 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8))));
1685 assert_eq!(iter.next(), None);
1686 assert_eq!(iter.next(), None);
1687 }
1688 }
1689
1690 /// Checked attribute is the first attribute in the list of many attributes
1691 mod first {
1692 use super::*;
1693 use pretty_assertions::assert_eq;
1694
1695 /// Attribute have a value enclosed in single quotes
1696 #[test]
1697 fn single_quoted() {
1698 let mut iter = Attributes::new(r#"tag key='value' regular='attribute'"#, 3);
1699
1700 assert_eq!(
1701 iter.next(),
1702 Some(Ok(Attribute {
1703 key: QName(b"key"),
1704 value: Cow::Borrowed(b"value"),
1705 }))
1706 );
1707 assert_eq!(
1708 iter.next(),
1709 Some(Ok(Attribute {
1710 key: QName(b"regular"),
1711 value: Cow::Borrowed(b"attribute"),
1712 }))
1713 );
1714 assert_eq!(iter.next(), None);
1715 assert_eq!(iter.next(), None);
1716 }
1717
1718 /// Attribute have a value enclosed in double quotes
1719 #[test]
1720 fn double_quoted() {
1721 let mut iter = Attributes::new(r#"tag key="value" regular='attribute'"#, 3);
1722
1723 assert_eq!(
1724 iter.next(),
1725 Some(Ok(Attribute {
1726 key: QName(b"key"),
1727 value: Cow::Borrowed(b"value"),
1728 }))
1729 );
1730 assert_eq!(
1731 iter.next(),
1732 Some(Ok(Attribute {
1733 key: QName(b"regular"),
1734 value: Cow::Borrowed(b"attribute"),
1735 }))
1736 );
1737 assert_eq!(iter.next(), None);
1738 assert_eq!(iter.next(), None);
1739 }
1740
1741 /// Attribute have a value, not enclosed in quotes
1742 #[test]
1743 fn unquoted() {
1744 let mut iter = Attributes::new(r#"tag key=value regular='attribute'"#, 3);
1745 // 0 ^ = 8
1746
1747 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8))));
1748 // check error recovery
1749 assert_eq!(
1750 iter.next(),
1751 Some(Ok(Attribute {
1752 key: QName(b"regular"),
1753 value: Cow::Borrowed(b"attribute"),
1754 }))
1755 );
1756 assert_eq!(iter.next(), None);
1757 assert_eq!(iter.next(), None);
1758 }
1759
1760 /// Only attribute key is present
1761 #[test]
1762 fn key_only() {
1763 let mut iter = Attributes::new(r#"tag key regular='attribute'"#, 3);
1764 // 0 ^ = 8
1765
1766 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8))));
1767 // check error recovery
1768 assert_eq!(
1769 iter.next(),
1770 Some(Ok(Attribute {
1771 key: QName(b"regular"),
1772 value: Cow::Borrowed(b"attribute"),
1773 }))
1774 );
1775 assert_eq!(iter.next(), None);
1776 assert_eq!(iter.next(), None);
1777 }
1778
1779 /// Key is started with an invalid symbol (a single quote in this test).
1780 /// Because we do not check validity of keys and values during parsing,
1781 /// that invalid attribute will be returned
1782 #[test]
1783 fn key_start_invalid() {
1784 let mut iter = Attributes::new(r#"tag 'key'='value' regular='attribute'"#, 3);
1785
1786 assert_eq!(
1787 iter.next(),
1788 Some(Ok(Attribute {
1789 key: QName(b"'key'"),
1790 value: Cow::Borrowed(b"value"),
1791 }))
1792 );
1793 assert_eq!(
1794 iter.next(),
1795 Some(Ok(Attribute {
1796 key: QName(b"regular"),
1797 value: Cow::Borrowed(b"attribute"),
1798 }))
1799 );
1800 assert_eq!(iter.next(), None);
1801 assert_eq!(iter.next(), None);
1802 }
1803
1804 /// Key contains an invalid symbol (an ampersand in this test).
1805 /// Because we do not check validity of keys and values during parsing,
1806 /// that invalid attribute will be returned
1807 #[test]
1808 fn key_contains_invalid() {
1809 let mut iter = Attributes::new(r#"tag key&jey='value' regular='attribute'"#, 3);
1810
1811 assert_eq!(
1812 iter.next(),
1813 Some(Ok(Attribute {
1814 key: QName(b"key&jey"),
1815 value: Cow::Borrowed(b"value"),
1816 }))
1817 );
1818 assert_eq!(
1819 iter.next(),
1820 Some(Ok(Attribute {
1821 key: QName(b"regular"),
1822 value: Cow::Borrowed(b"attribute"),
1823 }))
1824 );
1825 assert_eq!(iter.next(), None);
1826 assert_eq!(iter.next(), None);
1827 }
1828
1829 /// Attribute value is missing after `=`.
1830 #[test]
1831 fn missed_value() {
1832 let mut iter = Attributes::new(r#"tag key= regular='attribute'"#, 3);
1833 // 0 ^ = 9
1834
1835 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1836 // Because we do not check validity of keys and values during parsing,
1837 // "error='recovery'" is considered, as unquoted attribute value and
1838 // skipped during recovery and iteration finished
1839 assert_eq!(iter.next(), None);
1840 assert_eq!(iter.next(), None);
1841
1842 ////////////////////////////////////////////////////////////////////
1843
1844 let mut iter = Attributes::new(r#"tag key= regular= 'attribute'"#, 3);
1845 // 0 ^ = 9 ^ = 29
1846
1847 // In that case "regular=" considered as unquoted value
1848 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1849 // In that case "'attribute'" considered as a key, because we do not check
1850 // validity of key names
1851 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29))));
1852 assert_eq!(iter.next(), None);
1853 assert_eq!(iter.next(), None);
1854
1855 ////////////////////////////////////////////////////////////////////
1856
1857 let mut iter = Attributes::new(r#"tag key= regular ='attribute'"#, 3);
1858 // 0 ^ = 9 ^ = 29
1859
1860 // In that case "regular" considered as unquoted value
1861 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1862 // In that case "='attribute'" considered as a key, because we do not check
1863 // validity of key names
1864 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29))));
1865 assert_eq!(iter.next(), None);
1866 assert_eq!(iter.next(), None);
1867
1868 ////////////////////////////////////////////////////////////////////
1869
1870 let mut iter = Attributes::new(r#"tag key= regular = 'attribute'"#, 3);
1871 // 0 ^ = 9 ^ = 19 ^ = 30
1872
1873 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1874 // In that case second "=" considered as a key, because we do not check
1875 // validity of key names
1876 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(19))));
1877 // In that case "'attribute'" considered as a key, because we do not check
1878 // validity of key names
1879 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(30))));
1880 assert_eq!(iter.next(), None);
1881 assert_eq!(iter.next(), None);
1882 }
1883 }
1884
1885 /// Copy of single, but with additional spaces in markup
1886 mod sparsed {
1887 use super::*;
1888 use pretty_assertions::assert_eq;
1889
1890 /// Attribute have a value enclosed in single quotes
1891 #[test]
1892 fn single_quoted() {
1893 let mut iter = Attributes::new(r#"tag key = 'value' "#, 3);
1894
1895 assert_eq!(
1896 iter.next(),
1897 Some(Ok(Attribute {
1898 key: QName(b"key"),
1899 value: Cow::Borrowed(b"value"),
1900 }))
1901 );
1902 assert_eq!(iter.next(), None);
1903 assert_eq!(iter.next(), None);
1904 }
1905
1906 /// Attribute have a value enclosed in double quotes
1907 #[test]
1908 fn double_quoted() {
1909 let mut iter = Attributes::new(r#"tag key = "value" "#, 3);
1910
1911 assert_eq!(
1912 iter.next(),
1913 Some(Ok(Attribute {
1914 key: QName(b"key"),
1915 value: Cow::Borrowed(b"value"),
1916 }))
1917 );
1918 assert_eq!(iter.next(), None);
1919 assert_eq!(iter.next(), None);
1920 }
1921
1922 /// Attribute have a value, not enclosed in quotes
1923 #[test]
1924 fn unquoted() {
1925 let mut iter = Attributes::new(r#"tag key = value "#, 3);
1926 // 0 ^ = 10
1927
1928 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(10))));
1929 assert_eq!(iter.next(), None);
1930 assert_eq!(iter.next(), None);
1931 }
1932
1933 /// Only attribute key is present
1934 #[test]
1935 fn key_only() {
1936 let mut iter = Attributes::new(r#"tag key "#, 3);
1937 // 0 ^ = 8
1938
1939 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8))));
1940 assert_eq!(iter.next(), None);
1941 assert_eq!(iter.next(), None);
1942 }
1943
1944 /// Key is started with an invalid symbol (a single quote in this test).
1945 /// Because we do not check validity of keys and values during parsing,
1946 /// that invalid attribute will be returned
1947 #[test]
1948 fn key_start_invalid() {
1949 let mut iter = Attributes::new(r#"tag 'key' = 'value' "#, 3);
1950
1951 assert_eq!(
1952 iter.next(),
1953 Some(Ok(Attribute {
1954 key: QName(b"'key'"),
1955 value: Cow::Borrowed(b"value"),
1956 }))
1957 );
1958 assert_eq!(iter.next(), None);
1959 assert_eq!(iter.next(), None);
1960 }
1961
1962 /// Key contains an invalid symbol (an ampersand in this test).
1963 /// Because we do not check validity of keys and values during parsing,
1964 /// that invalid attribute will be returned
1965 #[test]
1966 fn key_contains_invalid() {
1967 let mut iter = Attributes::new(r#"tag key&jey = 'value' "#, 3);
1968
1969 assert_eq!(
1970 iter.next(),
1971 Some(Ok(Attribute {
1972 key: QName(b"key&jey"),
1973 value: Cow::Borrowed(b"value"),
1974 }))
1975 );
1976 assert_eq!(iter.next(), None);
1977 assert_eq!(iter.next(), None);
1978 }
1979
1980 /// Attribute value is missing after `=`
1981 #[test]
1982 fn missed_value() {
1983 let mut iter = Attributes::new(r#"tag key = "#, 3);
1984 // 0 ^ = 10
1985
1986 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10))));
1987 assert_eq!(iter.next(), None);
1988 assert_eq!(iter.next(), None);
1989 }
1990 }
1991
1992 /// Checks that duplicated attributes correctly reported and recovering is
1993 /// possible after that
1994 mod duplicated {
1995 use super::*;
1996
1997 mod with_check {
1998 use super::*;
1999 use pretty_assertions::assert_eq;
2000
2001 /// Attribute have a value enclosed in single quotes
2002 #[test]
2003 fn single_quoted() {
2004 let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3);
2005 // 0 ^ = 4 ^ = 16
2006
2007 assert_eq!(
2008 iter.next(),
2009 Some(Ok(Attribute {
2010 key: QName(b"key"),
2011 value: Cow::Borrowed(b"value"),
2012 }))
2013 );
2014 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2015 assert_eq!(
2016 iter.next(),
2017 Some(Ok(Attribute {
2018 key: QName(b"another"),
2019 value: Cow::Borrowed(b""),
2020 }))
2021 );
2022 assert_eq!(iter.next(), None);
2023 assert_eq!(iter.next(), None);
2024 }
2025
2026 /// Attribute have a value enclosed in double quotes
2027 #[test]
2028 fn double_quoted() {
2029 let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3);
2030 // 0 ^ = 4 ^ = 16
2031
2032 assert_eq!(
2033 iter.next(),
2034 Some(Ok(Attribute {
2035 key: QName(b"key"),
2036 value: Cow::Borrowed(b"value"),
2037 }))
2038 );
2039 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2040 assert_eq!(
2041 iter.next(),
2042 Some(Ok(Attribute {
2043 key: QName(b"another"),
2044 value: Cow::Borrowed(b""),
2045 }))
2046 );
2047 assert_eq!(iter.next(), None);
2048 assert_eq!(iter.next(), None);
2049 }
2050
2051 /// Attribute have a value, not enclosed in quotes
2052 #[test]
2053 fn unquoted() {
2054 let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3);
2055 // 0 ^ = 4 ^ = 16
2056
2057 assert_eq!(
2058 iter.next(),
2059 Some(Ok(Attribute {
2060 key: QName(b"key"),
2061 value: Cow::Borrowed(b"value"),
2062 }))
2063 );
2064 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2065 assert_eq!(
2066 iter.next(),
2067 Some(Ok(Attribute {
2068 key: QName(b"another"),
2069 value: Cow::Borrowed(b""),
2070 }))
2071 );
2072 assert_eq!(iter.next(), None);
2073 assert_eq!(iter.next(), None);
2074 }
2075
2076 /// Only attribute key is present
2077 #[test]
2078 fn key_only() {
2079 let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3);
2080 // 0 ^ = 20
2081
2082 assert_eq!(
2083 iter.next(),
2084 Some(Ok(Attribute {
2085 key: QName(b"key"),
2086 value: Cow::Borrowed(b"value"),
2087 }))
2088 );
2089 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20))));
2090 assert_eq!(
2091 iter.next(),
2092 Some(Ok(Attribute {
2093 key: QName(b"another"),
2094 value: Cow::Borrowed(b""),
2095 }))
2096 );
2097 assert_eq!(iter.next(), None);
2098 assert_eq!(iter.next(), None);
2099 }
2100
2101 /// Once a start tag declares more than `SMALL_ATTRIBUTE_COUNT`
2102 /// attributes the duplicate check switches to its hash-based path. A
2103 /// duplicate of a name first seen during the earlier linear phase must
2104 /// still be detected, with the original position reported. Regression
2105 /// cover for the cold path of [#969].
2106 ///
2107 /// [#969]: https://github.com/tafia/quick-xml/issues/969
2108 #[test]
2109 fn duplicate_past_hash_threshold() {
2110 let dup = SMALL_ATTRIBUTE_COUNT / 2;
2111 let n = SMALL_ATTRIBUTE_COUNT + 8;
2112
2113 let mut source = String::from("tag");
2114 let mut positions = Vec::with_capacity(n);
2115 for i in 0..n {
2116 source.push(' ');
2117 positions.push(source.len());
2118 source.push_str(&format!("k{:04}=''", i));
2119 }
2120 // Repeat the name first seen at `positions[dup]` (linear phase).
2121 source.push(' ');
2122 let dup_pos = source.len();
2123 source.push_str(&format!("k{:04}=''", dup));
2124
2125 let mut iter = Attributes::new(&source, 3);
2126 for _ in 0..n {
2127 assert!(matches!(iter.next(), Some(Ok(_))));
2128 }
2129 assert_eq!(
2130 iter.next(),
2131 Some(Err(AttrError::Duplicated(dup_pos, positions[dup])))
2132 );
2133 }
2134 }
2135
2136 /// Check for duplicated names is disabled
2137 mod without_check {
2138 use super::*;
2139 use pretty_assertions::assert_eq;
2140
2141 /// Attribute have a value enclosed in single quotes
2142 #[test]
2143 fn single_quoted() {
2144 let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3);
2145 iter.with_checks(false);
2146
2147 assert_eq!(
2148 iter.next(),
2149 Some(Ok(Attribute {
2150 key: QName(b"key"),
2151 value: Cow::Borrowed(b"value"),
2152 }))
2153 );
2154 assert_eq!(
2155 iter.next(),
2156 Some(Ok(Attribute {
2157 key: QName(b"key"),
2158 value: Cow::Borrowed(b"dup"),
2159 }))
2160 );
2161 assert_eq!(
2162 iter.next(),
2163 Some(Ok(Attribute {
2164 key: QName(b"another"),
2165 value: Cow::Borrowed(b""),
2166 }))
2167 );
2168 assert_eq!(iter.next(), None);
2169 assert_eq!(iter.next(), None);
2170 }
2171
2172 /// Attribute have a value enclosed in double quotes
2173 #[test]
2174 fn double_quoted() {
2175 let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3);
2176 iter.with_checks(false);
2177
2178 assert_eq!(
2179 iter.next(),
2180 Some(Ok(Attribute {
2181 key: QName(b"key"),
2182 value: Cow::Borrowed(b"value"),
2183 }))
2184 );
2185 assert_eq!(
2186 iter.next(),
2187 Some(Ok(Attribute {
2188 key: QName(b"key"),
2189 value: Cow::Borrowed(b"dup"),
2190 }))
2191 );
2192 assert_eq!(
2193 iter.next(),
2194 Some(Ok(Attribute {
2195 key: QName(b"another"),
2196 value: Cow::Borrowed(b""),
2197 }))
2198 );
2199 assert_eq!(iter.next(), None);
2200 assert_eq!(iter.next(), None);
2201 }
2202
2203 /// Attribute have a value, not enclosed in quotes
2204 #[test]
2205 fn unquoted() {
2206 let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3);
2207 // 0 ^ = 20
2208 iter.with_checks(false);
2209
2210 assert_eq!(
2211 iter.next(),
2212 Some(Ok(Attribute {
2213 key: QName(b"key"),
2214 value: Cow::Borrowed(b"value"),
2215 }))
2216 );
2217 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(20))));
2218 assert_eq!(
2219 iter.next(),
2220 Some(Ok(Attribute {
2221 key: QName(b"another"),
2222 value: Cow::Borrowed(b""),
2223 }))
2224 );
2225 assert_eq!(iter.next(), None);
2226 assert_eq!(iter.next(), None);
2227 }
2228
2229 /// Only attribute key is present
2230 #[test]
2231 fn key_only() {
2232 let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3);
2233 // 0 ^ = 20
2234 iter.with_checks(false);
2235
2236 assert_eq!(
2237 iter.next(),
2238 Some(Ok(Attribute {
2239 key: QName(b"key"),
2240 value: Cow::Borrowed(b"value"),
2241 }))
2242 );
2243 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20))));
2244 assert_eq!(
2245 iter.next(),
2246 Some(Ok(Attribute {
2247 key: QName(b"another"),
2248 value: Cow::Borrowed(b""),
2249 }))
2250 );
2251 assert_eq!(iter.next(), None);
2252 assert_eq!(iter.next(), None);
2253 }
2254 }
2255 }
2256
2257 #[test]
2258 fn mixed_quote() {
2259 let mut iter = Attributes::new(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3);
2260
2261 assert_eq!(
2262 iter.next(),
2263 Some(Ok(Attribute {
2264 key: QName(b"a"),
2265 value: Cow::Borrowed(b"a"),
2266 }))
2267 );
2268 assert_eq!(
2269 iter.next(),
2270 Some(Ok(Attribute {
2271 key: QName(b"b"),
2272 value: Cow::Borrowed(b"b"),
2273 }))
2274 );
2275 assert_eq!(
2276 iter.next(),
2277 Some(Ok(Attribute {
2278 key: QName(b"c"),
2279 value: Cow::Borrowed(br#"cc"cc"#),
2280 }))
2281 );
2282 assert_eq!(
2283 iter.next(),
2284 Some(Ok(Attribute {
2285 key: QName(b"d"),
2286 value: Cow::Borrowed(b"dd'dd"),
2287 }))
2288 );
2289 assert_eq!(iter.next(), None);
2290 assert_eq!(iter.next(), None);
2291 }
2292}
2293
2294/// Checks, how parsing of HTML-style attributes works. Each attribute can be
2295/// in three forms:
2296/// - XML-like: have a value, enclosed in single or double quotes
2297/// - have a value, do not enclosed in quotes
2298/// - without value, key only
2299#[cfg(test)]
2300mod html {
2301 use super::*;
2302 use pretty_assertions::assert_eq;
2303
2304 /// Checked attribute is the single attribute
2305 mod single {
2306 use super::*;
2307 use pretty_assertions::assert_eq;
2308
2309 /// Attribute have a value enclosed in single quotes
2310 #[test]
2311 fn single_quoted() {
2312 let mut iter = Attributes::html(r#"tag key='value'"#, 3);
2313
2314 assert_eq!(
2315 iter.next(),
2316 Some(Ok(Attribute {
2317 key: QName(b"key"),
2318 value: Cow::Borrowed(b"value"),
2319 }))
2320 );
2321 assert_eq!(iter.next(), None);
2322 assert_eq!(iter.next(), None);
2323 }
2324
2325 /// Attribute have a value enclosed in double quotes
2326 #[test]
2327 fn double_quoted() {
2328 let mut iter = Attributes::html(r#"tag key="value""#, 3);
2329
2330 assert_eq!(
2331 iter.next(),
2332 Some(Ok(Attribute {
2333 key: QName(b"key"),
2334 value: Cow::Borrowed(b"value"),
2335 }))
2336 );
2337 assert_eq!(iter.next(), None);
2338 assert_eq!(iter.next(), None);
2339 }
2340
2341 /// Attribute have a value, not enclosed in quotes
2342 #[test]
2343 fn unquoted() {
2344 let mut iter = Attributes::html(r#"tag key=value"#, 3);
2345
2346 assert_eq!(
2347 iter.next(),
2348 Some(Ok(Attribute {
2349 key: QName(b"key"),
2350 value: Cow::Borrowed(b"value"),
2351 }))
2352 );
2353 assert_eq!(iter.next(), None);
2354 assert_eq!(iter.next(), None);
2355 }
2356
2357 /// Only attribute key is present
2358 #[test]
2359 fn key_only() {
2360 let mut iter = Attributes::html(r#"tag key"#, 3);
2361
2362 assert_eq!(
2363 iter.next(),
2364 Some(Ok(Attribute {
2365 key: QName(b"key"),
2366 value: Cow::Borrowed(&[]),
2367 }))
2368 );
2369 assert_eq!(iter.next(), None);
2370 assert_eq!(iter.next(), None);
2371 }
2372
2373 /// Key is started with an invalid symbol (a single quote in this test).
2374 /// Because we do not check validity of keys and values during parsing,
2375 /// that invalid attribute will be returned
2376 #[test]
2377 fn key_start_invalid() {
2378 let mut iter = Attributes::html(r#"tag 'key'='value'"#, 3);
2379
2380 assert_eq!(
2381 iter.next(),
2382 Some(Ok(Attribute {
2383 key: QName(b"'key'"),
2384 value: Cow::Borrowed(b"value"),
2385 }))
2386 );
2387 assert_eq!(iter.next(), None);
2388 assert_eq!(iter.next(), None);
2389 }
2390
2391 /// Key contains an invalid symbol (an ampersand in this test).
2392 /// Because we do not check validity of keys and values during parsing,
2393 /// that invalid attribute will be returned
2394 #[test]
2395 fn key_contains_invalid() {
2396 let mut iter = Attributes::html(r#"tag key&jey='value'"#, 3);
2397
2398 assert_eq!(
2399 iter.next(),
2400 Some(Ok(Attribute {
2401 key: QName(b"key&jey"),
2402 value: Cow::Borrowed(b"value"),
2403 }))
2404 );
2405 assert_eq!(iter.next(), None);
2406 assert_eq!(iter.next(), None);
2407 }
2408
2409 /// Attribute value is missing after `=`
2410 #[test]
2411 fn missed_value() {
2412 let mut iter = Attributes::html(r#"tag key="#, 3);
2413 // 0 ^ = 8
2414
2415 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8))));
2416 assert_eq!(iter.next(), None);
2417 assert_eq!(iter.next(), None);
2418 }
2419 }
2420
2421 /// Checked attribute is the first attribute in the list of many attributes
2422 mod first {
2423 use super::*;
2424 use pretty_assertions::assert_eq;
2425
2426 /// Attribute have a value enclosed in single quotes
2427 #[test]
2428 fn single_quoted() {
2429 let mut iter = Attributes::html(r#"tag key='value' regular='attribute'"#, 3);
2430
2431 assert_eq!(
2432 iter.next(),
2433 Some(Ok(Attribute {
2434 key: QName(b"key"),
2435 value: Cow::Borrowed(b"value"),
2436 }))
2437 );
2438 assert_eq!(
2439 iter.next(),
2440 Some(Ok(Attribute {
2441 key: QName(b"regular"),
2442 value: Cow::Borrowed(b"attribute"),
2443 }))
2444 );
2445 assert_eq!(iter.next(), None);
2446 assert_eq!(iter.next(), None);
2447 }
2448
2449 /// Attribute have a value enclosed in double quotes
2450 #[test]
2451 fn double_quoted() {
2452 let mut iter = Attributes::html(r#"tag key="value" regular='attribute'"#, 3);
2453
2454 assert_eq!(
2455 iter.next(),
2456 Some(Ok(Attribute {
2457 key: QName(b"key"),
2458 value: Cow::Borrowed(b"value"),
2459 }))
2460 );
2461 assert_eq!(
2462 iter.next(),
2463 Some(Ok(Attribute {
2464 key: QName(b"regular"),
2465 value: Cow::Borrowed(b"attribute"),
2466 }))
2467 );
2468 assert_eq!(iter.next(), None);
2469 assert_eq!(iter.next(), None);
2470 }
2471
2472 /// Attribute have a value, not enclosed in quotes
2473 #[test]
2474 fn unquoted() {
2475 let mut iter = Attributes::html(r#"tag key=value regular='attribute'"#, 3);
2476
2477 assert_eq!(
2478 iter.next(),
2479 Some(Ok(Attribute {
2480 key: QName(b"key"),
2481 value: Cow::Borrowed(b"value"),
2482 }))
2483 );
2484 assert_eq!(
2485 iter.next(),
2486 Some(Ok(Attribute {
2487 key: QName(b"regular"),
2488 value: Cow::Borrowed(b"attribute"),
2489 }))
2490 );
2491 assert_eq!(iter.next(), None);
2492 assert_eq!(iter.next(), None);
2493 }
2494
2495 /// Only attribute key is present
2496 #[test]
2497 fn key_only() {
2498 let mut iter = Attributes::html(r#"tag key regular='attribute'"#, 3);
2499
2500 assert_eq!(
2501 iter.next(),
2502 Some(Ok(Attribute {
2503 key: QName(b"key"),
2504 value: Cow::Borrowed(&[]),
2505 }))
2506 );
2507 assert_eq!(
2508 iter.next(),
2509 Some(Ok(Attribute {
2510 key: QName(b"regular"),
2511 value: Cow::Borrowed(b"attribute"),
2512 }))
2513 );
2514 assert_eq!(iter.next(), None);
2515 assert_eq!(iter.next(), None);
2516 }
2517
2518 /// Key is started with an invalid symbol (a single quote in this test).
2519 /// Because we do not check validity of keys and values during parsing,
2520 /// that invalid attribute will be returned
2521 #[test]
2522 fn key_start_invalid() {
2523 let mut iter = Attributes::html(r#"tag 'key'='value' regular='attribute'"#, 3);
2524
2525 assert_eq!(
2526 iter.next(),
2527 Some(Ok(Attribute {
2528 key: QName(b"'key'"),
2529 value: Cow::Borrowed(b"value"),
2530 }))
2531 );
2532 assert_eq!(
2533 iter.next(),
2534 Some(Ok(Attribute {
2535 key: QName(b"regular"),
2536 value: Cow::Borrowed(b"attribute"),
2537 }))
2538 );
2539 assert_eq!(iter.next(), None);
2540 assert_eq!(iter.next(), None);
2541 }
2542
2543 /// Key contains an invalid symbol (an ampersand in this test).
2544 /// Because we do not check validity of keys and values during parsing,
2545 /// that invalid attribute will be returned
2546 #[test]
2547 fn key_contains_invalid() {
2548 let mut iter = Attributes::html(r#"tag key&jey='value' regular='attribute'"#, 3);
2549
2550 assert_eq!(
2551 iter.next(),
2552 Some(Ok(Attribute {
2553 key: QName(b"key&jey"),
2554 value: Cow::Borrowed(b"value"),
2555 }))
2556 );
2557 assert_eq!(
2558 iter.next(),
2559 Some(Ok(Attribute {
2560 key: QName(b"regular"),
2561 value: Cow::Borrowed(b"attribute"),
2562 }))
2563 );
2564 assert_eq!(iter.next(), None);
2565 assert_eq!(iter.next(), None);
2566 }
2567
2568 /// Attribute value is missing after `=`
2569 #[test]
2570 fn missed_value() {
2571 let mut iter = Attributes::html(r#"tag key= regular='attribute'"#, 3);
2572
2573 // Because we do not check validity of keys and values during parsing,
2574 // "regular='attribute'" is considered as unquoted attribute value
2575 assert_eq!(
2576 iter.next(),
2577 Some(Ok(Attribute {
2578 key: QName(b"key"),
2579 value: Cow::Borrowed(b"regular='attribute'"),
2580 }))
2581 );
2582 assert_eq!(iter.next(), None);
2583 assert_eq!(iter.next(), None);
2584
2585 ////////////////////////////////////////////////////////////////////
2586
2587 let mut iter = Attributes::html(r#"tag key= regular= 'attribute'"#, 3);
2588
2589 // Because we do not check validity of keys and values during parsing,
2590 // "regular=" is considered as unquoted attribute value
2591 assert_eq!(
2592 iter.next(),
2593 Some(Ok(Attribute {
2594 key: QName(b"key"),
2595 value: Cow::Borrowed(b"regular="),
2596 }))
2597 );
2598 // Because we do not check validity of keys and values during parsing,
2599 // "'attribute'" is considered as key-only attribute
2600 assert_eq!(
2601 iter.next(),
2602 Some(Ok(Attribute {
2603 key: QName(b"'attribute'"),
2604 value: Cow::Borrowed(&[]),
2605 }))
2606 );
2607 assert_eq!(iter.next(), None);
2608 assert_eq!(iter.next(), None);
2609
2610 ////////////////////////////////////////////////////////////////////
2611
2612 let mut iter = Attributes::html(r#"tag key= regular ='attribute'"#, 3);
2613
2614 // Because we do not check validity of keys and values during parsing,
2615 // "regular" is considered as unquoted attribute value
2616 assert_eq!(
2617 iter.next(),
2618 Some(Ok(Attribute {
2619 key: QName(b"key"),
2620 value: Cow::Borrowed(b"regular"),
2621 }))
2622 );
2623 // Because we do not check validity of keys and values during parsing,
2624 // "='attribute'" is considered as key-only attribute
2625 assert_eq!(
2626 iter.next(),
2627 Some(Ok(Attribute {
2628 key: QName(b"='attribute'"),
2629 value: Cow::Borrowed(&[]),
2630 }))
2631 );
2632 assert_eq!(iter.next(), None);
2633 assert_eq!(iter.next(), None);
2634
2635 ////////////////////////////////////////////////////////////////////
2636
2637 let mut iter = Attributes::html(r#"tag key= regular = 'attribute'"#, 3);
2638 // 0 ^ = 9 ^ = 19 ^ = 30
2639
2640 // Because we do not check validity of keys and values during parsing,
2641 // "regular" is considered as unquoted attribute value
2642 assert_eq!(
2643 iter.next(),
2644 Some(Ok(Attribute {
2645 key: QName(b"key"),
2646 value: Cow::Borrowed(b"regular"),
2647 }))
2648 );
2649 // Because we do not check validity of keys and values during parsing,
2650 // "=" is considered as key-only attribute
2651 assert_eq!(
2652 iter.next(),
2653 Some(Ok(Attribute {
2654 key: QName(b"="),
2655 value: Cow::Borrowed(&[]),
2656 }))
2657 );
2658 // Because we do not check validity of keys and values during parsing,
2659 // "'attribute'" is considered as key-only attribute
2660 assert_eq!(
2661 iter.next(),
2662 Some(Ok(Attribute {
2663 key: QName(b"'attribute'"),
2664 value: Cow::Borrowed(&[]),
2665 }))
2666 );
2667 assert_eq!(iter.next(), None);
2668 assert_eq!(iter.next(), None);
2669 }
2670 }
2671
2672 /// Copy of single, but with additional spaces in markup
2673 mod sparsed {
2674 use super::*;
2675 use pretty_assertions::assert_eq;
2676
2677 /// Attribute have a value enclosed in single quotes
2678 #[test]
2679 fn single_quoted() {
2680 let mut iter = Attributes::html(r#"tag key = 'value' "#, 3);
2681
2682 assert_eq!(
2683 iter.next(),
2684 Some(Ok(Attribute {
2685 key: QName(b"key"),
2686 value: Cow::Borrowed(b"value"),
2687 }))
2688 );
2689 assert_eq!(iter.next(), None);
2690 assert_eq!(iter.next(), None);
2691 }
2692
2693 /// Attribute have a value enclosed in double quotes
2694 #[test]
2695 fn double_quoted() {
2696 let mut iter = Attributes::html(r#"tag key = "value" "#, 3);
2697
2698 assert_eq!(
2699 iter.next(),
2700 Some(Ok(Attribute {
2701 key: QName(b"key"),
2702 value: Cow::Borrowed(b"value"),
2703 }))
2704 );
2705 assert_eq!(iter.next(), None);
2706 assert_eq!(iter.next(), None);
2707 }
2708
2709 /// Attribute have a value, not enclosed in quotes
2710 #[test]
2711 fn unquoted() {
2712 let mut iter = Attributes::html(r#"tag key = value "#, 3);
2713
2714 assert_eq!(
2715 iter.next(),
2716 Some(Ok(Attribute {
2717 key: QName(b"key"),
2718 value: Cow::Borrowed(b"value"),
2719 }))
2720 );
2721 assert_eq!(iter.next(), None);
2722 assert_eq!(iter.next(), None);
2723 }
2724
2725 /// Only attribute key is present
2726 #[test]
2727 fn key_only() {
2728 let mut iter = Attributes::html(r#"tag key "#, 3);
2729
2730 assert_eq!(
2731 iter.next(),
2732 Some(Ok(Attribute {
2733 key: QName(b"key"),
2734 value: Cow::Borrowed(&[]),
2735 }))
2736 );
2737 assert_eq!(iter.next(), None);
2738 assert_eq!(iter.next(), None);
2739 }
2740
2741 /// Key is started with an invalid symbol (a single quote in this test).
2742 /// Because we do not check validity of keys and values during parsing,
2743 /// that invalid attribute will be returned
2744 #[test]
2745 fn key_start_invalid() {
2746 let mut iter = Attributes::html(r#"tag 'key' = 'value' "#, 3);
2747
2748 assert_eq!(
2749 iter.next(),
2750 Some(Ok(Attribute {
2751 key: QName(b"'key'"),
2752 value: Cow::Borrowed(b"value"),
2753 }))
2754 );
2755 assert_eq!(iter.next(), None);
2756 assert_eq!(iter.next(), None);
2757 }
2758
2759 /// Key contains an invalid symbol (an ampersand in this test).
2760 /// Because we do not check validity of keys and values during parsing,
2761 /// that invalid attribute will be returned
2762 #[test]
2763 fn key_contains_invalid() {
2764 let mut iter = Attributes::html(r#"tag key&jey = 'value' "#, 3);
2765
2766 assert_eq!(
2767 iter.next(),
2768 Some(Ok(Attribute {
2769 key: QName(b"key&jey"),
2770 value: Cow::Borrowed(b"value"),
2771 }))
2772 );
2773 assert_eq!(iter.next(), None);
2774 assert_eq!(iter.next(), None);
2775 }
2776
2777 /// Attribute value is missing after `=`
2778 #[test]
2779 fn missed_value() {
2780 let mut iter = Attributes::html(r#"tag key = "#, 3);
2781 // 0 ^ = 10
2782
2783 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10))));
2784 assert_eq!(iter.next(), None);
2785 assert_eq!(iter.next(), None);
2786 }
2787 }
2788
2789 /// Checks that duplicated attributes correctly reported and recovering is
2790 /// possible after that
2791 mod duplicated {
2792 use super::*;
2793
2794 mod with_check {
2795 use super::*;
2796 use pretty_assertions::assert_eq;
2797
2798 /// Attribute have a value enclosed in single quotes
2799 #[test]
2800 fn single_quoted() {
2801 let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3);
2802 // 0 ^ = 4 ^ = 16
2803
2804 assert_eq!(
2805 iter.next(),
2806 Some(Ok(Attribute {
2807 key: QName(b"key"),
2808 value: Cow::Borrowed(b"value"),
2809 }))
2810 );
2811 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2812 assert_eq!(
2813 iter.next(),
2814 Some(Ok(Attribute {
2815 key: QName(b"another"),
2816 value: Cow::Borrowed(b""),
2817 }))
2818 );
2819 assert_eq!(iter.next(), None);
2820 assert_eq!(iter.next(), None);
2821 }
2822
2823 /// Attribute have a value enclosed in double quotes
2824 #[test]
2825 fn double_quoted() {
2826 let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3);
2827 // 0 ^ = 4 ^ = 16
2828
2829 assert_eq!(
2830 iter.next(),
2831 Some(Ok(Attribute {
2832 key: QName(b"key"),
2833 value: Cow::Borrowed(b"value"),
2834 }))
2835 );
2836 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2837 assert_eq!(
2838 iter.next(),
2839 Some(Ok(Attribute {
2840 key: QName(b"another"),
2841 value: Cow::Borrowed(b""),
2842 }))
2843 );
2844 assert_eq!(iter.next(), None);
2845 assert_eq!(iter.next(), None);
2846 }
2847
2848 /// Attribute have a value, not enclosed in quotes
2849 #[test]
2850 fn unquoted() {
2851 let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3);
2852 // 0 ^ = 4 ^ = 16
2853
2854 assert_eq!(
2855 iter.next(),
2856 Some(Ok(Attribute {
2857 key: QName(b"key"),
2858 value: Cow::Borrowed(b"value"),
2859 }))
2860 );
2861 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2862 assert_eq!(
2863 iter.next(),
2864 Some(Ok(Attribute {
2865 key: QName(b"another"),
2866 value: Cow::Borrowed(b""),
2867 }))
2868 );
2869 assert_eq!(iter.next(), None);
2870 assert_eq!(iter.next(), None);
2871 }
2872
2873 /// Only attribute key is present
2874 #[test]
2875 fn key_only() {
2876 let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3);
2877 // 0 ^ = 4 ^ = 16
2878
2879 assert_eq!(
2880 iter.next(),
2881 Some(Ok(Attribute {
2882 key: QName(b"key"),
2883 value: Cow::Borrowed(b"value"),
2884 }))
2885 );
2886 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2887 assert_eq!(
2888 iter.next(),
2889 Some(Ok(Attribute {
2890 key: QName(b"another"),
2891 value: Cow::Borrowed(b""),
2892 }))
2893 );
2894 assert_eq!(iter.next(), None);
2895 assert_eq!(iter.next(), None);
2896 }
2897 }
2898
2899 /// Check for duplicated names is disabled
2900 mod without_check {
2901 use super::*;
2902 use pretty_assertions::assert_eq;
2903
2904 /// Attribute have a value enclosed in single quotes
2905 #[test]
2906 fn single_quoted() {
2907 let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3);
2908 iter.with_checks(false);
2909
2910 assert_eq!(
2911 iter.next(),
2912 Some(Ok(Attribute {
2913 key: QName(b"key"),
2914 value: Cow::Borrowed(b"value"),
2915 }))
2916 );
2917 assert_eq!(
2918 iter.next(),
2919 Some(Ok(Attribute {
2920 key: QName(b"key"),
2921 value: Cow::Borrowed(b"dup"),
2922 }))
2923 );
2924 assert_eq!(
2925 iter.next(),
2926 Some(Ok(Attribute {
2927 key: QName(b"another"),
2928 value: Cow::Borrowed(b""),
2929 }))
2930 );
2931 assert_eq!(iter.next(), None);
2932 assert_eq!(iter.next(), None);
2933 }
2934
2935 /// Attribute have a value enclosed in double quotes
2936 #[test]
2937 fn double_quoted() {
2938 let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3);
2939 iter.with_checks(false);
2940
2941 assert_eq!(
2942 iter.next(),
2943 Some(Ok(Attribute {
2944 key: QName(b"key"),
2945 value: Cow::Borrowed(b"value"),
2946 }))
2947 );
2948 assert_eq!(
2949 iter.next(),
2950 Some(Ok(Attribute {
2951 key: QName(b"key"),
2952 value: Cow::Borrowed(b"dup"),
2953 }))
2954 );
2955 assert_eq!(
2956 iter.next(),
2957 Some(Ok(Attribute {
2958 key: QName(b"another"),
2959 value: Cow::Borrowed(b""),
2960 }))
2961 );
2962 assert_eq!(iter.next(), None);
2963 assert_eq!(iter.next(), None);
2964 }
2965
2966 /// Attribute have a value, not enclosed in quotes
2967 #[test]
2968 fn unquoted() {
2969 let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3);
2970 iter.with_checks(false);
2971
2972 assert_eq!(
2973 iter.next(),
2974 Some(Ok(Attribute {
2975 key: QName(b"key"),
2976 value: Cow::Borrowed(b"value"),
2977 }))
2978 );
2979 assert_eq!(
2980 iter.next(),
2981 Some(Ok(Attribute {
2982 key: QName(b"key"),
2983 value: Cow::Borrowed(b"dup"),
2984 }))
2985 );
2986 assert_eq!(
2987 iter.next(),
2988 Some(Ok(Attribute {
2989 key: QName(b"another"),
2990 value: Cow::Borrowed(b""),
2991 }))
2992 );
2993 assert_eq!(iter.next(), None);
2994 assert_eq!(iter.next(), None);
2995 }
2996
2997 /// Only attribute key is present
2998 #[test]
2999 fn key_only() {
3000 let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3);
3001 iter.with_checks(false);
3002
3003 assert_eq!(
3004 iter.next(),
3005 Some(Ok(Attribute {
3006 key: QName(b"key"),
3007 value: Cow::Borrowed(b"value"),
3008 }))
3009 );
3010 assert_eq!(
3011 iter.next(),
3012 Some(Ok(Attribute {
3013 key: QName(b"key"),
3014 value: Cow::Borrowed(&[]),
3015 }))
3016 );
3017 assert_eq!(
3018 iter.next(),
3019 Some(Ok(Attribute {
3020 key: QName(b"another"),
3021 value: Cow::Borrowed(b""),
3022 }))
3023 );
3024 assert_eq!(iter.next(), None);
3025 assert_eq!(iter.next(), None);
3026 }
3027 }
3028 }
3029
3030 #[test]
3031 fn mixed_quote() {
3032 let mut iter = Attributes::html(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3);
3033
3034 assert_eq!(
3035 iter.next(),
3036 Some(Ok(Attribute {
3037 key: QName(b"a"),
3038 value: Cow::Borrowed(b"a"),
3039 }))
3040 );
3041 assert_eq!(
3042 iter.next(),
3043 Some(Ok(Attribute {
3044 key: QName(b"b"),
3045 value: Cow::Borrowed(b"b"),
3046 }))
3047 );
3048 assert_eq!(
3049 iter.next(),
3050 Some(Ok(Attribute {
3051 key: QName(b"c"),
3052 value: Cow::Borrowed(br#"cc"cc"#),
3053 }))
3054 );
3055 assert_eq!(
3056 iter.next(),
3057 Some(Ok(Attribute {
3058 key: QName(b"d"),
3059 value: Cow::Borrowed(b"dd'dd"),
3060 }))
3061 );
3062 assert_eq!(iter.next(), None);
3063 assert_eq!(iter.next(), None);
3064 }
3065}