quick_xml/events/attributes.rs
1//! Xml Attributes module
2//!
3//! Provides an iterator over attributes key/value pairs
4
5use crate::encoding::Decoder;
6use crate::errors::Result as XmlResult;
7use crate::escape::{escape, resolve_predefined_entity};
8use crate::name::{LocalName, Namespace, NamespaceResolver, QName};
9use crate::utils::{is_whitespace, Bytes};
10use crate::XmlVersion;
11
12use std::fmt::{self, Debug, Display, Formatter};
13use std::iter::FusedIterator;
14use std::{borrow::Cow, ops::Range};
15
16/// A struct representing a key/value XML attribute.
17///
18/// Field `value` stores raw bytes, possibly containing escape-sequences. Most users will likely
19/// want to access the value using one of the [`normalized_value`] and [`decoded_and_normalized_value`]
20/// functions.
21///
22/// [`normalized_value`]: Self::normalized_value
23/// [`decoded_and_normalized_value`]: Self::decoded_and_normalized_value
24#[derive(Clone, Eq, PartialEq)]
25pub struct Attribute<'a> {
26 /// The key to uniquely define the attribute.
27 ///
28 /// If [`Attributes::with_checks`] is turned off, the key might not be unique.
29 pub key: QName<'a>,
30 /// The raw value of the attribute.
31 pub value: Cow<'a, [u8]>,
32}
33
34impl<'a> Attribute<'a> {
35 /// Returns the attribute value normalized as per [the XML specification] (or [for 1.0]).
36 ///
37 /// The document **must** be UTF-8 encoded, or pre-processed using [`DecodingReader`].
38 ///
39 /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
40 ///
41 /// The following escape sequences are replaced with their unescaped equivalents:
42 ///
43 /// | Escape Sequence | Replacement
44 /// |-----------------|------------
45 /// | `<` | `<`
46 /// | `>` | `>`
47 /// | `&` | `&`
48 /// | `'` | `'`
49 /// | `"` | `"`
50 ///
51 /// This will allocate unless the raw attribute value does not require normalization.
52 ///
53 /// Note, although you may use this library to parse HTML, you cannot use this
54 /// method to get HTML content, because its returns normalized value: the following
55 /// sequences are translated into a single space (U+0020) character:
56 ///
57 /// - `\r\n`
58 /// - `\r\x85` (only XML 1.1)
59 /// - `\r`
60 /// - `\n`
61 /// - `\t`
62 /// - `\x85` (only XML 1.1)
63 /// - `\x2028` (only XML 1.1)
64 ///
65 /// The text in HTML normally is not normalized in any way; normalization is
66 /// performed only in limited contexts and [only for] `\r\n` and `\r`.
67 ///
68 /// See also [`normalized_value_with()`](Self::normalized_value_with).
69 ///
70 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
71 ///
72 /// NOTE: If you are using this in a context where the input is not controlled,
73 /// it is preferred to wrap the input stream in [`DecodingReader`] or to use
74 /// [`decoded_and_normalized_value()`](Self::decoded_and_normalized_value) instead.
75 ///
76 /// </div>
77 ///
78 /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
79 /// [`DecodingReader`]: ../../encoding/struct.DecodingReader.html
80 /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
81 /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
82 pub fn normalized_value(&self, version: XmlVersion) -> XmlResult<Cow<'a, str>> {
83 // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
84 self.normalized_value_with(version, 1, resolve_predefined_entity)
85 }
86
87 /// Returns the attribute value normalized as per [the XML specification] (or [for 1.0]),
88 /// using a custom entity resolver.
89 ///
90 /// The document **must** be UTF-8 encoded, or pre-processed using [`DecodingReader`].
91 ///
92 /// Do not use this method with HTML attributes.
93 ///
94 /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
95 ///
96 /// A function for resolving entities can be provided as `resolve_entity`.
97 /// This method does not resolve any predefined entities, but you can use
98 /// [`resolve_predefined_entity`] in your function.
99 ///
100 /// This will allocate unless the raw attribute value does not require normalization.
101 ///
102 /// Note, although you may use this library to parse HTML, you cannot use this
103 /// method to get HTML content, because its returns normalized value: the following
104 /// sequences are translated into a single space (U+0020) character:
105 ///
106 /// - `\r\n`
107 /// - `\r\x85` (only XML 1.1)
108 /// - `\r`
109 /// - `\n`
110 /// - `\t`
111 /// - `\x85` (only XML 1.1)
112 /// - `\x2028` (only XML 1.1)
113 ///
114 /// The text in HTML normally is not normalized in any way; normalization is
115 /// performed only in limited contexts and [only for] `\r\n` and `\r`.
116 ///
117 /// See also [`normalized_value()`](Self::normalized_value).
118 ///
119 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
120 ///
121 /// NOTE: If you are using this in a context where the input is not controlled,
122 /// it is preferred to wrap the input stream in [`DecodingReader`] or to use
123 /// [`decoded_and_normalized_value_with()`](Self::decoded_and_normalized_value_with) instead.
124 ///
125 /// </div>
126 ///
127 /// # Parameters
128 ///
129 /// - `depth`: maximum number of nested entities that can be expanded. If expansion
130 /// chain will be more that this value, the function will return [`EscapeError::TooManyNestedEntities`]
131 /// - `resolve_entity`: a function to resolve entity. This function could be called
132 /// multiple times on the same input and can return different values in each case
133 /// for the same input, although it is not recommended
134 ///
135 /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
136 /// [`DecodingReader`]: ../../encoding/struct.DecodingReader.html
137 /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
138 /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
139 /// [`EscapeError::TooManyNestedEntities`]: crate::escape::EscapeError::TooManyNestedEntities
140 pub fn normalized_value_with<'entity>(
141 &self,
142 version: XmlVersion,
143 depth: usize,
144 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
145 ) -> XmlResult<Cow<'a, str>> {
146 use crate::encoding::EncodingError;
147 use std::str::from_utf8;
148
149 let decoded = match &self.value {
150 Cow::Borrowed(bytes) => Cow::Borrowed(from_utf8(bytes).map_err(EncodingError::Utf8)?),
151 // Convert to owned, because otherwise Cow will be bound with wrong lifetime
152 Cow::Owned(bytes) => {
153 Cow::Owned(from_utf8(bytes).map_err(EncodingError::Utf8)?.to_owned())
154 }
155 };
156
157 match version.normalize_attribute_value(&decoded, depth, resolve_entity)? {
158 // Because result is borrowed, no replacements was done and we can use original string
159 Cow::Borrowed(_) => Ok(decoded),
160 Cow::Owned(s) => Ok(s.into()),
161 }
162 }
163
164 /// Decodes using a provided reader and returns the attribute value normalized
165 /// as per [the XML specification] (or [for 1.0]).
166 ///
167 /// Do not use this method with HTML attributes.
168 ///
169 /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
170 ///
171 /// The following escape sequences are replaced with their unescaped equivalents:
172 ///
173 /// | Escape Sequence | Replacement
174 /// |-----------------|------------
175 /// | `<` | `<`
176 /// | `>` | `>`
177 /// | `&` | `&`
178 /// | `'` | `'`
179 /// | `"` | `"`
180 ///
181 /// This will allocate unless the raw attribute value does not require normalization.
182 ///
183 /// Note, although you may use this library to parse HTML, you cannot use this
184 /// method to get HTML content, because its returns normalized value: the following
185 /// sequences are translated into a single space (U+0020) character:
186 ///
187 /// - `\r\n`
188 /// - `\r\x85` (only XML 1.1)
189 /// - `\r`
190 /// - `\n`
191 /// - `\t`
192 /// - `\x85` (only XML 1.1)
193 /// - `\x2028` (only XML 1.1)
194 ///
195 /// The text in HTML normally is not normalized in any way; normalization is
196 /// performed only in limited contexts and [only for] `\r\n` and `\r`.
197 ///
198 /// See also [`decoded_and_normalized_value_with()`](#method.decoded_and_normalized_value_with)
199 ///
200 /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
201 /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
202 /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
203 pub fn decoded_and_normalized_value(
204 &self,
205 version: XmlVersion,
206 decoder: Decoder,
207 ) -> XmlResult<Cow<'a, str>> {
208 // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
209 self.decoded_and_normalized_value_with(version, decoder, 1, resolve_predefined_entity)
210 }
211
212 /// Decodes using a provided reader and returns the attribute value normalized
213 /// as per [the XML specification] (or [for 1.0]), using a custom entity resolver.
214 ///
215 /// Do not use this method with HTML attributes.
216 ///
217 /// The characters `\t`, `\r`, `\n` are replaced with whitespace characters (`0x20`).
218 ///
219 /// A function for resolving entities can be provided as `resolve_entity`.
220 /// This method does not resolve any predefined entities, but you can use
221 /// [`resolve_predefined_entity`] in your function.
222 ///
223 /// This will allocate unless the raw attribute value does not require normalization.
224 ///
225 /// Note, although you may use this library to parse HTML, you cannot use this
226 /// method to get HTML content, because its returns normalized value: the following
227 /// sequences are translated into a single space (U+0020) character:
228 ///
229 /// - `\r\n`
230 /// - `\r\x85` (only XML 1.1)
231 /// - `\r`
232 /// - `\n`
233 /// - `\t`
234 /// - `\x85` (only XML 1.1)
235 /// - `\x2028` (only XML 1.1)
236 ///
237 /// The text in HTML normally is not normalized in any way; normalization is
238 /// performed only in limited contexts and [only for] `\r\n` and `\r`.
239 ///
240 /// See also [`decoded_and_normalized_value()`](#method.decoded_and_normalized_value)
241 ///
242 /// # Parameters
243 ///
244 /// - `depth`: maximum number of nested entities that can be expanded. If expansion
245 /// chain will be more that this value, the function will return [`EscapeError::TooManyNestedEntities`]
246 /// - `resolve_entity`: a function to resolve entity. This function could be called
247 /// multiple times on the same input and can return different values in each case
248 /// for the same input, although it is not recommended
249 ///
250 /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize
251 /// [for 1.0]: https://www.w3.org/TR/xml/#AVNormalize
252 /// [only for]: https://html.spec.whatwg.org/#normalize-newlines
253 /// [`EscapeError::TooManyNestedEntities`]: crate::escape::EscapeError::TooManyNestedEntities
254 pub fn decoded_and_normalized_value_with<'entity>(
255 &self,
256 version: XmlVersion,
257 decoder: Decoder,
258 depth: usize,
259 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
260 ) -> XmlResult<Cow<'a, str>> {
261 let decoded = match &self.value {
262 Cow::Borrowed(bytes) => decoder.decode(bytes)?,
263 // Convert to owned, because otherwise Cow will be bound with wrong lifetime
264 Cow::Owned(bytes) => decoder.decode(bytes)?.into_owned().into(),
265 };
266
267 match version.normalize_attribute_value(&decoded, depth, resolve_entity)? {
268 // Because result is borrowed, no replacements was done and we can use original string
269 Cow::Borrowed(_) => Ok(decoded),
270 Cow::Owned(s) => Ok(s.into()),
271 }
272 }
273
274 /// Returns the unescaped value.
275 ///
276 /// This is normally the value you are interested in. Escape sequences such as `>` are
277 /// replaced with their unescaped equivalents such as `>`.
278 ///
279 /// This will allocate if the value contains any escape sequences.
280 ///
281 /// See also [`unescape_value_with()`](Self::unescape_value_with)
282 ///
283 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
284 ///
285 /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled,
286 /// should only be used by applications.
287 /// Libs should use [`decoded_and_normalized_value()`](Self::decoded_and_normalized_value)
288 /// instead, because if lib will be used in a project which depends on quick_xml with
289 /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification].
290 ///
291 /// </div>
292 ///
293 /// [`encoding`]: ../../index.html#encoding
294 /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification
295 #[cfg(any(doc, not(feature = "encoding")))]
296 #[deprecated = "use `Self::normalized_value()`"]
297 pub fn unescape_value(&self) -> XmlResult<Cow<'a, str>> {
298 // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
299 self.normalized_value_with(XmlVersion::Implicit1_0, 1, resolve_predefined_entity)
300 }
301
302 /// Decodes using UTF-8 then unescapes the value, using custom entities.
303 ///
304 /// This is normally the value you are interested in. Escape sequences such as `>` are
305 /// replaced with their unescaped equivalents such as `>`.
306 /// A fallback resolver for additional custom entities can be provided via
307 /// `resolve_entity`.
308 ///
309 /// This will allocate if the value contains any escape sequences.
310 ///
311 /// See also [`unescape_value()`](Self::unescape_value)
312 ///
313 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
314 ///
315 /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled,
316 /// should only be used by applications.
317 /// Libs should use [`decoded_and_normalized_value_with()`](Self::decoded_and_normalized_value_with)
318 /// instead, because if lib will be used in a project which depends on quick_xml with
319 /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification].
320 ///
321 /// </div>
322 ///
323 /// [`encoding`]: ../../index.html#encoding
324 /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification
325 #[cfg(any(doc, not(feature = "encoding")))]
326 #[deprecated = "use `Self::normalized_value_with()`"]
327 #[inline]
328 pub fn unescape_value_with<'entity>(
329 &self,
330 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
331 ) -> XmlResult<Cow<'a, str>> {
332 self.normalized_value_with(XmlVersion::Implicit1_0, 128, resolve_entity)
333 }
334
335 /// Decodes then unescapes the value.
336 ///
337 /// This will allocate if the value contains any escape sequences or in
338 /// non-UTF-8 encoding.
339 #[deprecated = "use `Self::decoded_and_normalized_value()`"]
340 pub fn decode_and_unescape_value(&self, decoder: Decoder) -> XmlResult<Cow<'a, str>> {
341 // resolve_predefined_entity returns only non-recursive replacements, so depth=1 is enough
342 self.decoded_and_normalized_value_with(
343 XmlVersion::Implicit1_0,
344 decoder,
345 1,
346 resolve_predefined_entity,
347 )
348 }
349
350 /// Decodes then unescapes the value with custom entities.
351 ///
352 /// This will allocate if the value contains any escape sequences or in
353 /// non-UTF-8 encoding.
354 #[deprecated = "use `Self::decoded_and_normalized_value_with()`"]
355 pub fn decode_and_unescape_value_with<'entity>(
356 &self,
357 decoder: Decoder,
358 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
359 ) -> XmlResult<Cow<'a, str>> {
360 self.decoded_and_normalized_value_with(
361 XmlVersion::Implicit1_0,
362 decoder,
363 128,
364 resolve_entity,
365 )
366 }
367
368 /// If attribute value [represents] valid boolean values, returns `Some`, otherwise returns `None`.
369 ///
370 /// The valid boolean representations are only `"true"`, `"false"`, `"1"`, and `"0"`.
371 ///
372 /// # Examples
373 ///
374 /// ```
375 /// # use pretty_assertions::assert_eq;
376 /// use quick_xml::events::attributes::Attribute;
377 ///
378 /// let attr = Attribute::from(("attr", "false"));
379 /// assert_eq!(attr.as_bool(), Some(false));
380 ///
381 /// let attr = Attribute::from(("attr", "0"));
382 /// assert_eq!(attr.as_bool(), Some(false));
383 ///
384 /// let attr = Attribute::from(("attr", "true"));
385 /// assert_eq!(attr.as_bool(), Some(true));
386 ///
387 /// let attr = Attribute::from(("attr", "1"));
388 /// assert_eq!(attr.as_bool(), Some(true));
389 ///
390 /// let attr = Attribute::from(("attr", "not bool"));
391 /// assert_eq!(attr.as_bool(), None);
392 /// ```
393 ///
394 /// [represents]: https://www.w3.org/TR/xmlschema11-2/#boolean
395 #[inline]
396 pub fn as_bool(&self) -> Option<bool> {
397 match self.value.as_ref() {
398 b"1" | b"true" => Some(true),
399 b"0" | b"false" => Some(false),
400 _ => None,
401 }
402 }
403}
404
405impl<'a> Debug for Attribute<'a> {
406 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
407 f.debug_struct("Attribute")
408 .field("key", &Bytes(self.key.as_ref()))
409 .field("value", &Bytes(&self.value))
410 .finish()
411 }
412}
413
414impl<'a> From<(&'a [u8], &'a [u8])> for Attribute<'a> {
415 /// Creates new attribute from raw bytes.
416 /// Does not apply any transformation to both key and value.
417 ///
418 /// # Examples
419 ///
420 /// ```
421 /// # use pretty_assertions::assert_eq;
422 /// use quick_xml::events::attributes::Attribute;
423 ///
424 /// let features = Attribute::from(("features".as_bytes(), "Bells & whistles".as_bytes()));
425 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
426 /// ```
427 fn from(val: (&'a [u8], &'a [u8])) -> Attribute<'a> {
428 Attribute {
429 key: QName(val.0),
430 value: Cow::from(val.1),
431 }
432 }
433}
434
435impl<'a> From<(&'a str, &'a str)> for Attribute<'a> {
436 /// Creates new attribute from text representation.
437 /// Key is stored as-is, but the value will be escaped.
438 ///
439 /// # Examples
440 ///
441 /// ```
442 /// # use pretty_assertions::assert_eq;
443 /// use quick_xml::events::attributes::Attribute;
444 ///
445 /// let features = Attribute::from(("features", "Bells & whistles"));
446 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
447 /// ```
448 fn from(val: (&'a str, &'a str)) -> Attribute<'a> {
449 Attribute {
450 key: QName(val.0.as_bytes()),
451 value: match escape(val.1) {
452 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
453 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
454 },
455 }
456 }
457}
458
459impl<'a> From<(&'a str, Cow<'a, str>)> for Attribute<'a> {
460 /// Creates new attribute from text representation.
461 /// Key is stored as-is, but the value will be escaped.
462 ///
463 /// # Examples
464 ///
465 /// ```
466 /// # use std::borrow::Cow;
467 /// use pretty_assertions::assert_eq;
468 /// use quick_xml::events::attributes::Attribute;
469 ///
470 /// let features = Attribute::from(("features", Cow::Borrowed("Bells & whistles")));
471 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
472 /// ```
473 fn from(val: (&'a str, Cow<'a, str>)) -> Attribute<'a> {
474 Attribute {
475 key: QName(val.0.as_bytes()),
476 value: match escape(val.1) {
477 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
478 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
479 },
480 }
481 }
482}
483
484impl<'a> From<Attr<&'a [u8]>> for Attribute<'a> {
485 #[inline]
486 fn from(attr: Attr<&'a [u8]>) -> Self {
487 Self {
488 key: attr.key(),
489 value: Cow::Borrowed(attr.value()),
490 }
491 }
492}
493
494////////////////////////////////////////////////////////////////////////////////////////////////////
495
496/// Iterator over XML attributes.
497///
498/// Yields `Result<Attribute>`. An `Err` will be yielded if an attribute is malformed or duplicated.
499/// The duplicate check can be turned off by calling [`with_checks(false)`].
500///
501/// When [`serialize`] feature is enabled, can be converted to serde's deserializer.
502///
503/// [`with_checks(false)`]: Self::with_checks
504/// [`serialize`]: ../../index.html#serialize
505#[derive(Clone)]
506pub struct Attributes<'a> {
507 /// Slice of `BytesStart` corresponding to attributes
508 bytes: &'a [u8],
509 /// Iterator state, independent from the actual source of bytes
510 state: IterState,
511 /// Encoding used for `bytes`
512 decoder: Decoder,
513}
514
515impl<'a> Attributes<'a> {
516 /// Internal constructor, used by `BytesStart`. Supplies data in reader's encoding
517 #[inline]
518 pub(crate) const fn wrap(buf: &'a [u8], pos: usize, html: bool, decoder: Decoder) -> Self {
519 Self {
520 bytes: buf,
521 state: IterState::new(pos, html),
522 decoder,
523 }
524 }
525
526 /// Creates a new attribute iterator from a buffer, which recognizes only XML-style
527 /// attributes, i. e. those which in the form `name = "value"` or `name = 'value'`.
528 /// HTML style attributes (i. e. without quotes or only name) will return a error.
529 ///
530 /// # Parameters
531 /// - `buf`: a buffer with a tag name and attributes, usually this is the whole
532 /// string between `<` and `>` (or `/>`) of a tag;
533 /// - `pos`: a position in the `buf` where tag name is finished and attributes
534 /// is started. It is not necessary to point exactly to the end of a tag name,
535 /// although that is usually that. If it will be more than the `buf` length,
536 /// then the iterator will return `None`` immediately.
537 ///
538 /// # Example
539 /// ```
540 /// # use quick_xml::events::attributes::{Attribute, Attributes};
541 /// # use pretty_assertions::assert_eq;
542 /// #
543 /// let mut iter = Attributes::new("tag-name attr1 = 'value1' attr2='value2' ", 9);
544 /// // ^0 ^9
545 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1")))));
546 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "value2")))));
547 /// assert_eq!(iter.next(), None);
548 /// ```
549 pub const fn new(buf: &'a str, pos: usize) -> Self {
550 Self::wrap(buf.as_bytes(), pos, false, Decoder::utf8())
551 }
552
553 /// Creates a new attribute iterator from a buffer, allowing HTML attribute syntax.
554 ///
555 /// # Parameters
556 /// - `buf`: a buffer with a tag name and attributes, usually this is the whole
557 /// string between `<` and `>` (or `/>`) of a tag;
558 /// - `pos`: a position in the `buf` where tag name is finished and attributes
559 /// is started. It is not necessary to point exactly to the end of a tag name,
560 /// although that is usually that. If it will be more than the `buf` length,
561 /// then the iterator will return `None`` immediately.
562 ///
563 /// # Example
564 /// ```
565 /// # use quick_xml::events::attributes::{Attribute, Attributes};
566 /// # use pretty_assertions::assert_eq;
567 /// #
568 /// let mut iter = Attributes::html("tag-name attr1 = value1 attr2 ", 9);
569 /// // ^0 ^9
570 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1")))));
571 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "")))));
572 /// assert_eq!(iter.next(), None);
573 /// ```
574 pub const fn html(buf: &'a str, pos: usize) -> Self {
575 Self::wrap(buf.as_bytes(), pos, true, Decoder::utf8())
576 }
577
578 /// Changes whether attributes should be checked for uniqueness.
579 ///
580 /// The XML specification requires attribute keys in the same element to be unique. This check
581 /// can be disabled to improve performance slightly.
582 ///
583 /// (`true` by default)
584 pub fn with_checks(&mut self, val: bool) -> &mut Attributes<'a> {
585 self.state.check_duplicates = val;
586 self
587 }
588
589 /// Checks if the current tag has a [`xsi:nil`] attribute. This method ignores any errors in
590 /// attributes.
591 ///
592 /// # Examples
593 ///
594 /// ```
595 /// # use pretty_assertions::assert_eq;
596 /// use quick_xml::events::Event;
597 /// use quick_xml::name::QName;
598 /// use quick_xml::reader::NsReader;
599 ///
600 /// let mut reader = NsReader::from_str("
601 /// <root xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'>
602 /// <true xsi:nil='true'/>
603 /// <false xsi:nil='false'/>
604 /// <none/>
605 /// <non-xsi xsi:nil='true' xmlns:xsi='namespace'/>
606 /// <unbound-nil nil='true' xmlns='http://www.w3.org/2001/XMLSchema-instance'/>
607 /// <another-xmlns f:nil='true' xmlns:f='http://www.w3.org/2001/XMLSchema-instance'/>
608 /// </root>
609 /// ");
610 /// reader.config_mut().trim_text(true);
611 ///
612 /// macro_rules! check {
613 /// ($reader:expr, $name:literal, $value:literal) => {
614 /// let event = match $reader.read_event().unwrap() {
615 /// Event::Empty(e) => e,
616 /// e => panic!("Unexpected event {:?}", e),
617 /// };
618 /// assert_eq!(
619 /// (event.name(), event.attributes().has_nil($reader.resolver())),
620 /// (QName($name.as_bytes()), $value),
621 /// );
622 /// };
623 /// }
624 ///
625 /// let root = match reader.read_event().unwrap() {
626 /// Event::Start(e) => e,
627 /// e => panic!("Unexpected event {:?}", e),
628 /// };
629 /// assert_eq!(root.attributes().has_nil(reader.resolver()), false);
630 ///
631 /// // definitely true
632 /// check!(reader, "true", true);
633 /// // definitely false
634 /// check!(reader, "false", false);
635 /// // absence of the attribute means that attribute is not set
636 /// check!(reader, "none", false);
637 /// // attribute not bound to the correct namespace
638 /// check!(reader, "non-xsi", false);
639 /// // attributes without prefix not bound to any namespace
640 /// check!(reader, "unbound-nil", false);
641 /// // prefix can be any while it is bound to the correct namespace
642 /// check!(reader, "another-xmlns", true);
643 /// ```
644 ///
645 /// [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil
646 pub fn has_nil(&mut self, resolver: &NamespaceResolver) -> bool {
647 use crate::name::ResolveResult::*;
648
649 self.any(|attr| {
650 if let Ok(attr) = attr {
651 match resolver.resolve_attribute(attr.key) {
652 (
653 Bound(Namespace(b"http://www.w3.org/2001/XMLSchema-instance")),
654 LocalName(b"nil"),
655 ) => attr.as_bool().unwrap_or_default(),
656 _ => false,
657 }
658 } else {
659 false
660 }
661 })
662 }
663
664 /// Get the decoder, used to decode bytes, read by the reader which produces
665 /// this iterator, to the strings.
666 ///
667 /// When iterator was created manually or get from a manually created [`BytesStart`],
668 /// encoding is UTF-8.
669 ///
670 /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
671 /// defaults to UTF-8.
672 ///
673 /// [`BytesStart`]: crate::events::BytesStart
674 /// [`encoding`]: ../index.html#encoding
675 #[inline]
676 pub const fn decoder(&self) -> Decoder {
677 self.decoder
678 }
679}
680
681impl<'a> Debug for Attributes<'a> {
682 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
683 f.debug_struct("Attributes")
684 .field("bytes", &Bytes(self.bytes))
685 .field("state", &self.state)
686 .field("decoder", &self.decoder)
687 .finish()
688 }
689}
690
691impl<'a> Iterator for Attributes<'a> {
692 type Item = Result<Attribute<'a>, AttrError>;
693
694 #[inline]
695 fn next(&mut self) -> Option<Self::Item> {
696 match self.state.next(self.bytes) {
697 None => None,
698 Some(Ok(a)) => Some(Ok(a.map(|range| &self.bytes[range]).into())),
699 Some(Err(e)) => Some(Err(e)),
700 }
701 }
702}
703
704impl<'a> FusedIterator for Attributes<'a> {}
705
706////////////////////////////////////////////////////////////////////////////////////////////////////
707
708/// Errors that can be raised during parsing attributes.
709///
710/// Recovery position in examples shows the position from which parsing of the
711/// next attribute will be attempted.
712#[derive(Clone, Debug, PartialEq, Eq)]
713pub enum AttrError {
714 /// Attribute key was not followed by `=`, position relative to the start of
715 /// the owning tag is provided.
716 ///
717 /// Example of input that raises this error:
718 ///
719 /// ```xml
720 /// <tag key another="attribute"/>
721 /// <!-- ^~~ error position, recovery position (8) -->
722 /// ```
723 ///
724 /// This error can be raised only when the iterator is in XML mode.
725 ExpectedEq(usize),
726 /// Attribute value was not found after `=`, position relative to the start
727 /// of the owning tag is provided.
728 ///
729 /// Example of input that raises this error:
730 ///
731 /// ```xml
732 /// <tag key = />
733 /// <!-- ^~~ error position, recovery position (10) -->
734 /// ```
735 ///
736 /// This error can be returned only for the last attribute in the list,
737 /// because otherwise any content after `=` will be threated as a value.
738 /// The XML
739 ///
740 /// ```xml
741 /// <tag key = another-key = "value"/>
742 /// <!-- ^ ^- recovery position (24) -->
743 /// <!-- '~~ error position (22) -->
744 /// ```
745 ///
746 /// will be treated as `Attribute { key = b"key", value = b"another-key" }`
747 /// and or [`Attribute`] is returned, or [`AttrError::UnquotedValue`] is raised,
748 /// depending on the parsing mode.
749 ExpectedValue(usize),
750 /// Attribute value is not quoted, position relative to the start of the
751 /// owning tag is provided.
752 ///
753 /// Example of input that raises this error:
754 ///
755 /// ```xml
756 /// <tag key = value />
757 /// <!-- ^ ^~~ recovery position (15) -->
758 /// <!-- '~~ error position (10) -->
759 /// ```
760 ///
761 /// This error can be raised only when the iterator is in XML mode.
762 UnquotedValue(usize),
763 /// Attribute value was not finished with a matching quote, position relative
764 /// to the start of owning tag and a quote is provided. That position is always
765 /// a last character in the tag content.
766 ///
767 /// Example of input that raises this error:
768 ///
769 /// ```xml
770 /// <tag key = "value />
771 /// <tag key = 'value />
772 /// <!-- ^~~ error position, recovery position (18) -->
773 /// ```
774 ///
775 /// This error can be returned only for the last attribute in the list,
776 /// because all input was consumed during scanning for a quote.
777 ExpectedQuote(usize, u8),
778 /// An attribute with the same name was already encountered. Two parameters
779 /// define (1) the error position relative to the start of the owning tag
780 /// for a new attribute and (2) the start position of a previously encountered
781 /// attribute with the same name.
782 ///
783 /// Example of input that raises this error:
784 ///
785 /// ```xml
786 /// <tag key = 'value' key="value2" attr3='value3' />
787 /// <!-- ^ ^ ^~~ recovery position (32) -->
788 /// <!-- | '~~ error position (19) -->
789 /// <!-- '~~ previous position (4) -->
790 /// ```
791 ///
792 /// This error is returned only when [`Attributes::with_checks()`] is set
793 /// to `true` (that is default behavior).
794 Duplicated(usize, usize),
795}
796
797impl Display for AttrError {
798 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
799 match self {
800 Self::ExpectedEq(pos) => write!(
801 f,
802 r#"position {}: attribute key must be directly followed by `=` or space"#,
803 pos
804 ),
805 Self::ExpectedValue(pos) => write!(
806 f,
807 r#"position {}: `=` must be followed by an attribute value"#,
808 pos
809 ),
810 Self::UnquotedValue(pos) => write!(
811 f,
812 r#"position {}: attribute value must be enclosed in `"` or `'`"#,
813 pos
814 ),
815 Self::ExpectedQuote(pos, quote) => write!(
816 f,
817 r#"position {}: missing closing quote `{}` in attribute value"#,
818 pos, *quote as char
819 ),
820 Self::Duplicated(pos1, pos2) => write!(
821 f,
822 r#"position {}: duplicated attribute, previous declaration at position {}"#,
823 pos1, pos2
824 ),
825 }
826 }
827}
828
829impl std::error::Error for AttrError {}
830
831////////////////////////////////////////////////////////////////////////////////////////////////////
832
833/// A struct representing a key/value XML or HTML [attribute].
834///
835/// [attribute]: https://www.w3.org/TR/xml11/#NT-Attribute
836#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
837pub enum Attr<T> {
838 /// Attribute with value enclosed in double quotes (`"`). Attribute key and
839 /// value provided. This is a canonical XML-style attribute.
840 DoubleQ(T, T),
841 /// Attribute with value enclosed in single quotes (`'`). Attribute key and
842 /// value provided. This is an XML-style attribute.
843 SingleQ(T, T),
844 /// Attribute with value not enclosed in quotes. Attribute key and value
845 /// provided. This is HTML-style attribute, it can be returned in HTML-mode
846 /// parsing only. In an XML mode [`AttrError::UnquotedValue`] will be raised
847 /// instead.
848 ///
849 /// Attribute value can be invalid according to the [HTML specification],
850 /// in particular, it can contain `"`, `'`, `=`, `<`, and <code>`</code>
851 /// characters. The absence of the `>` character is nevertheless guaranteed,
852 /// since the parser extracts [events] based on them even before the start
853 /// of parsing attributes.
854 ///
855 /// [HTML specification]: https://html.spec.whatwg.org/#unquoted
856 /// [events]: crate::events::Event::Start
857 Unquoted(T, T),
858 /// Attribute without value. Attribute key provided. This is HTML-style attribute,
859 /// it can be returned in HTML-mode parsing only. In XML mode
860 /// [`AttrError::ExpectedEq`] will be raised instead.
861 Empty(T),
862}
863
864impl<T> Attr<T> {
865 /// Maps an `Attr<T>` to `Attr<U>` by applying a function to a contained key and value.
866 #[inline]
867 pub fn map<U, F>(self, mut f: F) -> Attr<U>
868 where
869 F: FnMut(T) -> U,
870 {
871 match self {
872 Attr::DoubleQ(key, value) => Attr::DoubleQ(f(key), f(value)),
873 Attr::SingleQ(key, value) => Attr::SingleQ(f(key), f(value)),
874 Attr::Empty(key) => Attr::Empty(f(key)),
875 Attr::Unquoted(key, value) => Attr::Unquoted(f(key), f(value)),
876 }
877 }
878}
879
880impl<'a> Attr<&'a [u8]> {
881 /// Returns the key value
882 #[inline]
883 pub const fn key(&self) -> QName<'a> {
884 QName(match self {
885 Attr::DoubleQ(key, _) => key,
886 Attr::SingleQ(key, _) => key,
887 Attr::Empty(key) => key,
888 Attr::Unquoted(key, _) => key,
889 })
890 }
891 /// Returns the attribute value. For [`Self::Empty`] variant an empty slice
892 /// is returned according to the [HTML specification].
893 ///
894 /// [HTML specification]: https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#syntax-attr-empty
895 #[inline]
896 pub const fn value(&self) -> &'a [u8] {
897 match self {
898 Attr::DoubleQ(_, value) => value,
899 Attr::SingleQ(_, value) => value,
900 Attr::Empty(_) => &[],
901 Attr::Unquoted(_, value) => value,
902 }
903 }
904}
905
906impl<T: AsRef<[u8]>> Debug for Attr<T> {
907 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
908 match self {
909 Attr::DoubleQ(key, value) => f
910 .debug_tuple("Attr::DoubleQ")
911 .field(&Bytes(key.as_ref()))
912 .field(&Bytes(value.as_ref()))
913 .finish(),
914 Attr::SingleQ(key, value) => f
915 .debug_tuple("Attr::SingleQ")
916 .field(&Bytes(key.as_ref()))
917 .field(&Bytes(value.as_ref()))
918 .finish(),
919 Attr::Empty(key) => f
920 .debug_tuple("Attr::Empty")
921 // Comment to prevent formatting and keep style consistent
922 .field(&Bytes(key.as_ref()))
923 .finish(),
924 Attr::Unquoted(key, value) => f
925 .debug_tuple("Attr::Unquoted")
926 .field(&Bytes(key.as_ref()))
927 .field(&Bytes(value.as_ref()))
928 .finish(),
929 }
930 }
931}
932
933/// Unpacks attribute key and value into tuple of this two elements.
934/// `None` value element is returned only for [`Attr::Empty`] variant.
935impl<T> From<Attr<T>> for (T, Option<T>) {
936 #[inline]
937 fn from(attr: Attr<T>) -> Self {
938 match attr {
939 Attr::DoubleQ(key, value) => (key, Some(value)),
940 Attr::SingleQ(key, value) => (key, Some(value)),
941 Attr::Empty(key) => (key, None),
942 Attr::Unquoted(key, value) => (key, Some(value)),
943 }
944 }
945}
946
947////////////////////////////////////////////////////////////////////////////////////////////////////
948
949type AttrResult = Result<Attr<Range<usize>>, AttrError>;
950
951#[derive(Clone, Copy, Debug)]
952enum State {
953 /// Iteration finished, iterator will return `None` to all [`IterState::next`]
954 /// requests.
955 Done,
956 /// The last attribute returned was deserialized successfully. Contains an
957 /// offset from which next attribute should be searched.
958 Next(usize),
959 /// The last attribute returns [`AttrError::UnquotedValue`], offset pointed
960 /// to the beginning of the value. Recover should skip a value
961 SkipValue(usize),
962 /// The last attribute returns [`AttrError::Duplicated`], offset pointed to
963 /// the equal (`=`) sign. Recover should skip it and a value
964 SkipEqValue(usize),
965}
966
967/// External iterator over spans of attribute key and value
968#[derive(Clone, Debug)]
969pub(crate) struct IterState {
970 /// Iteration state that determines what actions should be done before the
971 /// actual parsing of the next attribute
972 state: State,
973 /// If `true`, enables ability to parse unquoted values and key-only (empty)
974 /// attributes
975 html: bool,
976 /// If `true`, checks for duplicate names
977 check_duplicates: bool,
978 /// If `check_duplicates` is set, contains the ranges of already parsed attribute
979 /// names. We store a ranges instead of slices to able to report a previous
980 /// attribute position
981 keys: Vec<Range<usize>>,
982}
983
984impl IterState {
985 pub const fn new(offset: usize, html: bool) -> Self {
986 Self {
987 state: State::Next(offset),
988 html,
989 check_duplicates: true,
990 keys: Vec::new(),
991 }
992 }
993
994 /// Recover from an error that could have been made on a previous step.
995 /// Returns an offset from which parsing should continue.
996 /// If there no input left, returns `None`.
997 fn recover(&self, slice: &[u8]) -> Option<usize> {
998 match self.state {
999 State::Done => None,
1000 State::Next(offset) => Some(offset),
1001 State::SkipValue(offset) => self.skip_value(slice, offset),
1002 State::SkipEqValue(offset) => self.skip_eq_value(slice, offset),
1003 }
1004 }
1005
1006 /// Skip all characters up to first space symbol or end-of-input
1007 #[inline]
1008 #[allow(clippy::manual_map)]
1009 fn skip_value(&self, slice: &[u8], offset: usize) -> Option<usize> {
1010 let mut iter = (offset..).zip(slice[offset..].iter());
1011
1012 match iter.find(|(_, &b)| is_whitespace(b)) {
1013 // Input: ` key = value `
1014 // | ^
1015 // offset e
1016 Some((e, _)) => Some(e),
1017 // Input: ` key = value`
1018 // | ^
1019 // offset e = len()
1020 None => None,
1021 }
1022 }
1023
1024 /// Skip all characters up to first space symbol or end-of-input
1025 #[inline]
1026 fn skip_eq_value(&self, slice: &[u8], offset: usize) -> Option<usize> {
1027 let mut iter = (offset..).zip(slice[offset..].iter());
1028
1029 // Skip all up to the quote and get the quote type
1030 let quote = match iter.find(|(_, &b)| !is_whitespace(b)) {
1031 // Input: ` key = "`
1032 // | ^
1033 // offset
1034 Some((_, b'"')) => b'"',
1035 // Input: ` key = '`
1036 // | ^
1037 // offset
1038 Some((_, b'\'')) => b'\'',
1039
1040 // Input: ` key = x`
1041 // | ^
1042 // offset
1043 Some((offset, _)) => return self.skip_value(slice, offset),
1044 // Input: ` key = `
1045 // | ^
1046 // offset
1047 None => return None,
1048 };
1049
1050 match iter.find(|(_, &b)| b == quote) {
1051 // Input: ` key = " "`
1052 // ^
1053 Some((e, b'"')) => Some(e),
1054 // Input: ` key = ' '`
1055 // ^
1056 Some((e, _)) => Some(e),
1057
1058 // Input: ` key = " `
1059 // Input: ` key = ' `
1060 // ^
1061 // Closing quote not found
1062 None => None,
1063 }
1064 }
1065
1066 #[inline]
1067 fn check_for_duplicates(
1068 &mut self,
1069 slice: &[u8],
1070 key: Range<usize>,
1071 ) -> Result<Range<usize>, AttrError> {
1072 if self.check_duplicates {
1073 if let Some(prev) = self
1074 .keys
1075 .iter()
1076 .find(|r| slice[(*r).clone()] == slice[key.clone()])
1077 {
1078 return Err(AttrError::Duplicated(key.start, prev.start));
1079 }
1080 self.keys.push(key.clone());
1081 }
1082 Ok(key)
1083 }
1084
1085 /// # Parameters
1086 ///
1087 /// - `slice`: content of the tag, used for checking for duplicates
1088 /// - `key`: Range of key in slice, if iterator in HTML mode
1089 /// - `offset`: Position of error if iterator in XML mode
1090 #[inline]
1091 fn key_only(&mut self, slice: &[u8], key: Range<usize>, offset: usize) -> Option<AttrResult> {
1092 Some(if self.html {
1093 self.check_for_duplicates(slice, key).map(Attr::Empty)
1094 } else {
1095 Err(AttrError::ExpectedEq(offset))
1096 })
1097 }
1098
1099 #[inline]
1100 fn double_q(&mut self, key: Range<usize>, value: Range<usize>) -> Option<AttrResult> {
1101 self.state = State::Next(value.end + 1); // +1 for `"`
1102
1103 Some(Ok(Attr::DoubleQ(key, value)))
1104 }
1105
1106 #[inline]
1107 fn single_q(&mut self, key: Range<usize>, value: Range<usize>) -> Option<AttrResult> {
1108 self.state = State::Next(value.end + 1); // +1 for `'`
1109
1110 Some(Ok(Attr::SingleQ(key, value)))
1111 }
1112
1113 pub fn next(&mut self, slice: &[u8]) -> Option<AttrResult> {
1114 let mut iter = match self.recover(slice) {
1115 Some(offset) => (offset..).zip(slice[offset..].iter()),
1116 None => return None,
1117 };
1118
1119 // Index where next key started
1120 let start_key = match iter.find(|(_, &b)| !is_whitespace(b)) {
1121 // Input: ` key`
1122 // ^
1123 Some((s, _)) => s,
1124 // Input: ` `
1125 // ^
1126 None => {
1127 // Because we reach end-of-input, stop iteration on next call
1128 self.state = State::Done;
1129 return None;
1130 }
1131 };
1132 // Span of a key
1133 let (key, offset) = match iter.find(|(_, &b)| b == b'=' || is_whitespace(b)) {
1134 // Input: ` key=`
1135 // | ^
1136 // s e
1137 Some((e, b'=')) => (start_key..e, e),
1138
1139 // Input: ` key `
1140 // ^
1141 Some((e, _)) => match iter.find(|(_, &b)| !is_whitespace(b)) {
1142 // Input: ` key =`
1143 // | | ^
1144 // start_key e
1145 Some((offset, b'=')) => (start_key..e, offset),
1146 // Input: ` key x`
1147 // | | ^
1148 // start_key e
1149 // If HTML-like attributes is allowed, this is the result, otherwise error
1150 Some((offset, _)) => {
1151 // In any case, recovering is not required
1152 self.state = State::Next(offset);
1153 return self.key_only(slice, start_key..e, offset);
1154 }
1155 // Input: ` key `
1156 // | | ^
1157 // start_key e
1158 // If HTML-like attributes is allowed, this is the result, otherwise error
1159 None => {
1160 // Because we reach end-of-input, stop iteration on next call
1161 self.state = State::Done;
1162 return self.key_only(slice, start_key..e, slice.len());
1163 }
1164 },
1165
1166 // Input: ` key`
1167 // | ^
1168 // s e = len()
1169 // If HTML-like attributes is allowed, this is the result, otherwise error
1170 None => {
1171 // Because we reach end-of-input, stop iteration on next call
1172 self.state = State::Done;
1173 let e = slice.len();
1174 return self.key_only(slice, start_key..e, e);
1175 }
1176 };
1177
1178 let key = match self.check_for_duplicates(slice, key) {
1179 Err(e) => {
1180 self.state = State::SkipEqValue(offset);
1181 return Some(Err(e));
1182 }
1183 Ok(key) => key,
1184 };
1185
1186 ////////////////////////////////////////////////////////////////////////
1187
1188 // Gets the position of quote and quote type
1189 let (start_value, quote) = match iter.find(|(_, &b)| !is_whitespace(b)) {
1190 // Input: ` key = "`
1191 // ^
1192 Some((s, b'"')) => (s + 1, b'"'),
1193 // Input: ` key = '`
1194 // ^
1195 Some((s, b'\'')) => (s + 1, b'\''),
1196
1197 // Input: ` key = x`
1198 // ^
1199 // If HTML-like attributes is allowed, this is the start of the value
1200 Some((s, _)) if self.html => {
1201 // We do not check validity of attribute value characters as required
1202 // according to https://html.spec.whatwg.org/#unquoted. It can be done
1203 // during validation phase
1204 let end = match iter.find(|(_, &b)| is_whitespace(b)) {
1205 // Input: ` key = value `
1206 // | ^
1207 // s e
1208 Some((e, _)) => e,
1209 // Input: ` key = value`
1210 // | ^
1211 // s e = len()
1212 None => slice.len(),
1213 };
1214 self.state = State::Next(end);
1215 return Some(Ok(Attr::Unquoted(key, s..end)));
1216 }
1217 // Input: ` key = x`
1218 // ^
1219 Some((s, _)) => {
1220 self.state = State::SkipValue(s);
1221 return Some(Err(AttrError::UnquotedValue(s)));
1222 }
1223
1224 // Input: ` key = `
1225 // ^
1226 None => {
1227 // Because we reach end-of-input, stop iteration on next call
1228 self.state = State::Done;
1229 return Some(Err(AttrError::ExpectedValue(slice.len())));
1230 }
1231 };
1232
1233 match iter.find(|(_, &b)| b == quote) {
1234 // Input: ` key = " "`
1235 // ^
1236 Some((e, b'"')) => self.double_q(key, start_value..e),
1237 // Input: ` key = ' '`
1238 // ^
1239 Some((e, _)) => self.single_q(key, start_value..e),
1240
1241 // Input: ` key = " `
1242 // Input: ` key = ' `
1243 // ^
1244 // Closing quote not found
1245 None => {
1246 // Because we reach end-of-input, stop iteration on next call
1247 self.state = State::Done;
1248 Some(Err(AttrError::ExpectedQuote(slice.len(), quote)))
1249 }
1250 }
1251 }
1252}
1253
1254////////////////////////////////////////////////////////////////////////////////////////////////////
1255
1256/// Checks, how parsing of XML-style attributes works. Each attribute should
1257/// have a value, enclosed in single or double quotes.
1258#[cfg(test)]
1259mod xml {
1260 use super::*;
1261 use pretty_assertions::assert_eq;
1262
1263 mod attribute_value_normalization {
1264 use super::*;
1265 use crate::errors::Error;
1266 use crate::escape::EscapeError::*;
1267 use crate::XmlVersion::*;
1268 use pretty_assertions::assert_eq;
1269
1270 /// Empty values returned are unchanged
1271 #[test]
1272 fn empty() {
1273 let raw_value = "".as_bytes();
1274 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1275
1276 let value = attr
1277 .decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1278 .unwrap();
1279 assert_eq!(value, "");
1280 // assert_eq! does not check if value is borrowed, but this is important
1281 assert!(matches!(value, Cow::Borrowed(_)));
1282
1283 let value = attr
1284 .decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1285 .unwrap();
1286 assert_eq!(value, "");
1287 // assert_eq! does not check if value is borrowed, but this is important
1288 assert!(matches!(value, Cow::Borrowed(_)));
1289
1290 let value = attr
1291 .decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1292 .unwrap();
1293 assert_eq!(value, "");
1294 // assert_eq! does not check if value is borrowed, but this is important
1295 assert!(matches!(value, Cow::Borrowed(_)));
1296 }
1297
1298 /// Already normalized values are returned unchanged
1299 #[test]
1300 fn already_normalized() {
1301 let raw_value = "foobar123".as_bytes();
1302 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1303
1304 let value = attr
1305 .decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1306 .unwrap();
1307 assert_eq!(value, "foobar123");
1308 // assert_eq! does not check if value is borrowed, but this is important
1309 assert!(matches!(value, Cow::Borrowed(_)));
1310
1311 let value = attr
1312 .decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1313 .unwrap();
1314 assert_eq!(value, "foobar123");
1315 // assert_eq! does not check if value is borrowed, but this is important
1316 assert!(matches!(value, Cow::Borrowed(_)));
1317
1318 let value = attr
1319 .decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1320 .unwrap();
1321 assert_eq!(value, "foobar123");
1322 // assert_eq! does not check if value is borrowed, but this is important
1323 assert!(matches!(value, Cow::Borrowed(_)));
1324 }
1325
1326 /// Return, tab, and newline characters (0xD, 0x9, 0xA) must be substituted with
1327 /// a space character, \r\n and \r\u{85} should be replaced by one space in 1.1
1328 #[test]
1329 fn space_replacement() {
1330 let raw_value = "\r\nfoo\u{85}\u{2028}\rbar\tbaz\n\ndelta\n\r\u{85}".as_bytes();
1331 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1332
1333 assert_eq!(
1334 attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1335 .unwrap(),
1336 " foo\u{85}\u{2028} bar baz delta \u{85}"
1337 );
1338 assert_eq!(
1339 attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1340 .unwrap(),
1341 " foo\u{85}\u{2028} bar baz delta \u{85}"
1342 );
1343 assert_eq!(
1344 attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1345 .unwrap(),
1346 " foo bar baz delta "
1347 );
1348 }
1349
1350 /// Entities must be terminated
1351 #[test]
1352 fn unterminated_entity() {
1353 let raw_value = "abc"def".as_bytes();
1354 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1355
1356 match attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8()) {
1357 Err(Error::Escape(err)) => assert_eq!(err, UnterminatedEntity(3..11)),
1358 x => panic!("Expected Err(Escape(_)), got {:?}", x),
1359 }
1360
1361 match attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8()) {
1362 Err(Error::Escape(err)) => assert_eq!(err, UnterminatedEntity(3..11)),
1363 x => panic!("Expected Err(Escape(_)), got {:?}", x),
1364 }
1365
1366 match attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8()) {
1367 Err(Error::Escape(err)) => assert_eq!(err, UnterminatedEntity(3..11)),
1368 x => panic!("Expected Err(Escape(_)), got {:?}", x),
1369 }
1370 }
1371
1372 /// Unknown entities raise error
1373 #[test]
1374 fn unrecognized_entity() {
1375 let raw_value = "abc&unkn;def".as_bytes();
1376 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1377
1378 match attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8()) {
1379 // TODO: is this divergence between range behavior of UnterminatedEntity
1380 // and UnrecognizedEntity appropriate? existing unescape code behaves the same. (see: start index)
1381 Err(Error::Escape(err)) => {
1382 assert_eq!(err, UnrecognizedEntity(4..8, "unkn".to_owned()))
1383 }
1384 x => panic!("Expected Err(Escape(err)), got {:?}", x),
1385 }
1386 match attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8()) {
1387 // TODO: is this divergence between range behavior of UnterminatedEntity
1388 // and UnrecognizedEntity appropriate? existing unescape code behaves the same. (see: start index)
1389 Err(Error::Escape(err)) => {
1390 assert_eq!(err, UnrecognizedEntity(4..8, "unkn".to_owned()))
1391 }
1392 x => panic!("Expected Err(Escape(err)), got {:?}", x),
1393 }
1394 match attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8()) {
1395 // TODO: is this divergence between range behavior of UnterminatedEntity
1396 // and UnrecognizedEntity appropriate? existing unescape code behaves the same. (see: start index)
1397 Err(Error::Escape(err)) => {
1398 assert_eq!(err, UnrecognizedEntity(4..8, "unkn".to_owned()))
1399 }
1400 x => panic!("Expected Err(Escape(err)), got {:?}", x),
1401 }
1402 }
1403
1404 /// custom entity replacement works, entity replacement text processed recursively
1405 #[test]
1406 fn entity_replacement() {
1407 let raw_value = "&d;&d;A&a; &a;B&da;".as_bytes();
1408 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1409 fn custom_resolver(ent: &str) -> Option<&'static str> {
1410 match ent {
1411 "d" => Some("
"),
1412 "a" => Some("
"),
1413 "da" => Some("
"),
1414 _ => None,
1415 }
1416 }
1417
1418 assert_eq!(
1419 attr.decoded_and_normalized_value_with(
1420 Implicit1_0,
1421 Decoder::utf8(),
1422 5,
1423 &custom_resolver
1424 )
1425 .unwrap(),
1426 "\r\rA\n \nB\r\n"
1427 );
1428 assert_eq!(
1429 attr.decoded_and_normalized_value_with(
1430 Explicit1_0,
1431 Decoder::utf8(),
1432 5,
1433 &custom_resolver
1434 )
1435 .unwrap(),
1436 "\r\rA\n \nB\r\n"
1437 );
1438 assert_eq!(
1439 attr.decoded_and_normalized_value_with(
1440 Explicit1_1,
1441 Decoder::utf8(),
1442 5,
1443 &custom_resolver
1444 )
1445 .unwrap(),
1446 "\r\rA\n \nB\r\n"
1447 );
1448 }
1449
1450 #[test]
1451 fn char_references() {
1452 // character literal references are substituted without being replaced by spaces
1453 let raw_value = "

A

B
".as_bytes();
1454 let attr = Attribute::from(("foo".as_bytes(), raw_value));
1455
1456 assert_eq!(
1457 attr.decoded_and_normalized_value(Implicit1_0, Decoder::utf8())
1458 .unwrap(),
1459 "\r\rA\n\nB\r\n"
1460 );
1461 assert_eq!(
1462 attr.decoded_and_normalized_value(Explicit1_0, Decoder::utf8())
1463 .unwrap(),
1464 "\r\rA\n\nB\r\n"
1465 );
1466 assert_eq!(
1467 attr.decoded_and_normalized_value(Explicit1_1, Decoder::utf8())
1468 .unwrap(),
1469 "\r\rA\n\nB\r\n"
1470 );
1471 }
1472 }
1473
1474 /// Checked attribute is the single attribute
1475 mod single {
1476 use super::*;
1477 use pretty_assertions::assert_eq;
1478
1479 /// Attribute have a value enclosed in single quotes
1480 #[test]
1481 fn single_quoted() {
1482 let mut iter = Attributes::new(r#"tag key='value'"#, 3);
1483
1484 assert_eq!(
1485 iter.next(),
1486 Some(Ok(Attribute {
1487 key: QName(b"key"),
1488 value: Cow::Borrowed(b"value"),
1489 }))
1490 );
1491 assert_eq!(iter.next(), None);
1492 assert_eq!(iter.next(), None);
1493 }
1494
1495 /// Attribute have a value enclosed in double quotes
1496 #[test]
1497 fn double_quoted() {
1498 let mut iter = Attributes::new(r#"tag key="value""#, 3);
1499
1500 assert_eq!(
1501 iter.next(),
1502 Some(Ok(Attribute {
1503 key: QName(b"key"),
1504 value: Cow::Borrowed(b"value"),
1505 }))
1506 );
1507 assert_eq!(iter.next(), None);
1508 assert_eq!(iter.next(), None);
1509 }
1510
1511 /// Attribute have a value, not enclosed in quotes
1512 #[test]
1513 fn unquoted() {
1514 let mut iter = Attributes::new(r#"tag key=value"#, 3);
1515 // 0 ^ = 8
1516
1517 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8))));
1518 assert_eq!(iter.next(), None);
1519 assert_eq!(iter.next(), None);
1520 }
1521
1522 /// Only attribute key is present
1523 #[test]
1524 fn key_only() {
1525 let mut iter = Attributes::new(r#"tag key"#, 3);
1526 // 0 ^ = 7
1527
1528 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(7))));
1529 assert_eq!(iter.next(), None);
1530 assert_eq!(iter.next(), None);
1531 }
1532
1533 /// Key is started with an invalid symbol (a single quote in this test).
1534 /// Because we do not check validity of keys and values during parsing,
1535 /// that invalid attribute will be returned
1536 #[test]
1537 fn key_start_invalid() {
1538 let mut iter = Attributes::new(r#"tag 'key'='value'"#, 3);
1539
1540 assert_eq!(
1541 iter.next(),
1542 Some(Ok(Attribute {
1543 key: QName(b"'key'"),
1544 value: Cow::Borrowed(b"value"),
1545 }))
1546 );
1547 assert_eq!(iter.next(), None);
1548 assert_eq!(iter.next(), None);
1549 }
1550
1551 /// Key contains an invalid symbol (an ampersand in this test).
1552 /// Because we do not check validity of keys and values during parsing,
1553 /// that invalid attribute will be returned
1554 #[test]
1555 fn key_contains_invalid() {
1556 let mut iter = Attributes::new(r#"tag key&jey='value'"#, 3);
1557
1558 assert_eq!(
1559 iter.next(),
1560 Some(Ok(Attribute {
1561 key: QName(b"key&jey"),
1562 value: Cow::Borrowed(b"value"),
1563 }))
1564 );
1565 assert_eq!(iter.next(), None);
1566 assert_eq!(iter.next(), None);
1567 }
1568
1569 /// Attribute value is missing after `=`
1570 #[test]
1571 fn missed_value() {
1572 let mut iter = Attributes::new(r#"tag key="#, 3);
1573 // 0 ^ = 8
1574
1575 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8))));
1576 assert_eq!(iter.next(), None);
1577 assert_eq!(iter.next(), None);
1578 }
1579 }
1580
1581 /// Checked attribute is the first attribute in the list of many attributes
1582 mod first {
1583 use super::*;
1584 use pretty_assertions::assert_eq;
1585
1586 /// Attribute have a value enclosed in single quotes
1587 #[test]
1588 fn single_quoted() {
1589 let mut iter = Attributes::new(r#"tag key='value' regular='attribute'"#, 3);
1590
1591 assert_eq!(
1592 iter.next(),
1593 Some(Ok(Attribute {
1594 key: QName(b"key"),
1595 value: Cow::Borrowed(b"value"),
1596 }))
1597 );
1598 assert_eq!(
1599 iter.next(),
1600 Some(Ok(Attribute {
1601 key: QName(b"regular"),
1602 value: Cow::Borrowed(b"attribute"),
1603 }))
1604 );
1605 assert_eq!(iter.next(), None);
1606 assert_eq!(iter.next(), None);
1607 }
1608
1609 /// Attribute have a value enclosed in double quotes
1610 #[test]
1611 fn double_quoted() {
1612 let mut iter = Attributes::new(r#"tag key="value" regular='attribute'"#, 3);
1613
1614 assert_eq!(
1615 iter.next(),
1616 Some(Ok(Attribute {
1617 key: QName(b"key"),
1618 value: Cow::Borrowed(b"value"),
1619 }))
1620 );
1621 assert_eq!(
1622 iter.next(),
1623 Some(Ok(Attribute {
1624 key: QName(b"regular"),
1625 value: Cow::Borrowed(b"attribute"),
1626 }))
1627 );
1628 assert_eq!(iter.next(), None);
1629 assert_eq!(iter.next(), None);
1630 }
1631
1632 /// Attribute have a value, not enclosed in quotes
1633 #[test]
1634 fn unquoted() {
1635 let mut iter = Attributes::new(r#"tag key=value regular='attribute'"#, 3);
1636 // 0 ^ = 8
1637
1638 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8))));
1639 // check error recovery
1640 assert_eq!(
1641 iter.next(),
1642 Some(Ok(Attribute {
1643 key: QName(b"regular"),
1644 value: Cow::Borrowed(b"attribute"),
1645 }))
1646 );
1647 assert_eq!(iter.next(), None);
1648 assert_eq!(iter.next(), None);
1649 }
1650
1651 /// Only attribute key is present
1652 #[test]
1653 fn key_only() {
1654 let mut iter = Attributes::new(r#"tag key regular='attribute'"#, 3);
1655 // 0 ^ = 8
1656
1657 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8))));
1658 // check error recovery
1659 assert_eq!(
1660 iter.next(),
1661 Some(Ok(Attribute {
1662 key: QName(b"regular"),
1663 value: Cow::Borrowed(b"attribute"),
1664 }))
1665 );
1666 assert_eq!(iter.next(), None);
1667 assert_eq!(iter.next(), None);
1668 }
1669
1670 /// Key is started with an invalid symbol (a single quote in this test).
1671 /// Because we do not check validity of keys and values during parsing,
1672 /// that invalid attribute will be returned
1673 #[test]
1674 fn key_start_invalid() {
1675 let mut iter = Attributes::new(r#"tag 'key'='value' regular='attribute'"#, 3);
1676
1677 assert_eq!(
1678 iter.next(),
1679 Some(Ok(Attribute {
1680 key: QName(b"'key'"),
1681 value: Cow::Borrowed(b"value"),
1682 }))
1683 );
1684 assert_eq!(
1685 iter.next(),
1686 Some(Ok(Attribute {
1687 key: QName(b"regular"),
1688 value: Cow::Borrowed(b"attribute"),
1689 }))
1690 );
1691 assert_eq!(iter.next(), None);
1692 assert_eq!(iter.next(), None);
1693 }
1694
1695 /// Key contains an invalid symbol (an ampersand in this test).
1696 /// Because we do not check validity of keys and values during parsing,
1697 /// that invalid attribute will be returned
1698 #[test]
1699 fn key_contains_invalid() {
1700 let mut iter = Attributes::new(r#"tag key&jey='value' regular='attribute'"#, 3);
1701
1702 assert_eq!(
1703 iter.next(),
1704 Some(Ok(Attribute {
1705 key: QName(b"key&jey"),
1706 value: Cow::Borrowed(b"value"),
1707 }))
1708 );
1709 assert_eq!(
1710 iter.next(),
1711 Some(Ok(Attribute {
1712 key: QName(b"regular"),
1713 value: Cow::Borrowed(b"attribute"),
1714 }))
1715 );
1716 assert_eq!(iter.next(), None);
1717 assert_eq!(iter.next(), None);
1718 }
1719
1720 /// Attribute value is missing after `=`.
1721 #[test]
1722 fn missed_value() {
1723 let mut iter = Attributes::new(r#"tag key= regular='attribute'"#, 3);
1724 // 0 ^ = 9
1725
1726 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1727 // Because we do not check validity of keys and values during parsing,
1728 // "error='recovery'" is considered, as unquoted attribute value and
1729 // skipped during recovery and iteration finished
1730 assert_eq!(iter.next(), None);
1731 assert_eq!(iter.next(), None);
1732
1733 ////////////////////////////////////////////////////////////////////
1734
1735 let mut iter = Attributes::new(r#"tag key= regular= 'attribute'"#, 3);
1736 // 0 ^ = 9 ^ = 29
1737
1738 // In that case "regular=" considered as unquoted value
1739 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1740 // In that case "'attribute'" considered as a key, because we do not check
1741 // validity of key names
1742 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29))));
1743 assert_eq!(iter.next(), None);
1744 assert_eq!(iter.next(), None);
1745
1746 ////////////////////////////////////////////////////////////////////
1747
1748 let mut iter = Attributes::new(r#"tag key= regular ='attribute'"#, 3);
1749 // 0 ^ = 9 ^ = 29
1750
1751 // In that case "regular" considered as unquoted value
1752 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1753 // In that case "='attribute'" considered as a key, because we do not check
1754 // validity of key names
1755 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29))));
1756 assert_eq!(iter.next(), None);
1757 assert_eq!(iter.next(), None);
1758
1759 ////////////////////////////////////////////////////////////////////
1760
1761 let mut iter = Attributes::new(r#"tag key= regular = 'attribute'"#, 3);
1762 // 0 ^ = 9 ^ = 19 ^ = 30
1763
1764 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1765 // In that case second "=" considered as a key, because we do not check
1766 // validity of key names
1767 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(19))));
1768 // In that case "'attribute'" considered as a key, because we do not check
1769 // validity of key names
1770 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(30))));
1771 assert_eq!(iter.next(), None);
1772 assert_eq!(iter.next(), None);
1773 }
1774 }
1775
1776 /// Copy of single, but with additional spaces in markup
1777 mod sparsed {
1778 use super::*;
1779 use pretty_assertions::assert_eq;
1780
1781 /// Attribute have a value enclosed in single quotes
1782 #[test]
1783 fn single_quoted() {
1784 let mut iter = Attributes::new(r#"tag key = 'value' "#, 3);
1785
1786 assert_eq!(
1787 iter.next(),
1788 Some(Ok(Attribute {
1789 key: QName(b"key"),
1790 value: Cow::Borrowed(b"value"),
1791 }))
1792 );
1793 assert_eq!(iter.next(), None);
1794 assert_eq!(iter.next(), None);
1795 }
1796
1797 /// Attribute have a value enclosed in double quotes
1798 #[test]
1799 fn double_quoted() {
1800 let mut iter = Attributes::new(r#"tag key = "value" "#, 3);
1801
1802 assert_eq!(
1803 iter.next(),
1804 Some(Ok(Attribute {
1805 key: QName(b"key"),
1806 value: Cow::Borrowed(b"value"),
1807 }))
1808 );
1809 assert_eq!(iter.next(), None);
1810 assert_eq!(iter.next(), None);
1811 }
1812
1813 /// Attribute have a value, not enclosed in quotes
1814 #[test]
1815 fn unquoted() {
1816 let mut iter = Attributes::new(r#"tag key = value "#, 3);
1817 // 0 ^ = 10
1818
1819 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(10))));
1820 assert_eq!(iter.next(), None);
1821 assert_eq!(iter.next(), None);
1822 }
1823
1824 /// Only attribute key is present
1825 #[test]
1826 fn key_only() {
1827 let mut iter = Attributes::new(r#"tag key "#, 3);
1828 // 0 ^ = 8
1829
1830 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8))));
1831 assert_eq!(iter.next(), None);
1832 assert_eq!(iter.next(), None);
1833 }
1834
1835 /// Key is started with an invalid symbol (a single quote in this test).
1836 /// Because we do not check validity of keys and values during parsing,
1837 /// that invalid attribute will be returned
1838 #[test]
1839 fn key_start_invalid() {
1840 let mut iter = Attributes::new(r#"tag 'key' = 'value' "#, 3);
1841
1842 assert_eq!(
1843 iter.next(),
1844 Some(Ok(Attribute {
1845 key: QName(b"'key'"),
1846 value: Cow::Borrowed(b"value"),
1847 }))
1848 );
1849 assert_eq!(iter.next(), None);
1850 assert_eq!(iter.next(), None);
1851 }
1852
1853 /// Key contains an invalid symbol (an ampersand in this test).
1854 /// Because we do not check validity of keys and values during parsing,
1855 /// that invalid attribute will be returned
1856 #[test]
1857 fn key_contains_invalid() {
1858 let mut iter = Attributes::new(r#"tag key&jey = 'value' "#, 3);
1859
1860 assert_eq!(
1861 iter.next(),
1862 Some(Ok(Attribute {
1863 key: QName(b"key&jey"),
1864 value: Cow::Borrowed(b"value"),
1865 }))
1866 );
1867 assert_eq!(iter.next(), None);
1868 assert_eq!(iter.next(), None);
1869 }
1870
1871 /// Attribute value is missing after `=`
1872 #[test]
1873 fn missed_value() {
1874 let mut iter = Attributes::new(r#"tag key = "#, 3);
1875 // 0 ^ = 10
1876
1877 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10))));
1878 assert_eq!(iter.next(), None);
1879 assert_eq!(iter.next(), None);
1880 }
1881 }
1882
1883 /// Checks that duplicated attributes correctly reported and recovering is
1884 /// possible after that
1885 mod duplicated {
1886 use super::*;
1887
1888 mod with_check {
1889 use super::*;
1890 use pretty_assertions::assert_eq;
1891
1892 /// Attribute have a value enclosed in single quotes
1893 #[test]
1894 fn single_quoted() {
1895 let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3);
1896 // 0 ^ = 4 ^ = 16
1897
1898 assert_eq!(
1899 iter.next(),
1900 Some(Ok(Attribute {
1901 key: QName(b"key"),
1902 value: Cow::Borrowed(b"value"),
1903 }))
1904 );
1905 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
1906 assert_eq!(
1907 iter.next(),
1908 Some(Ok(Attribute {
1909 key: QName(b"another"),
1910 value: Cow::Borrowed(b""),
1911 }))
1912 );
1913 assert_eq!(iter.next(), None);
1914 assert_eq!(iter.next(), None);
1915 }
1916
1917 /// Attribute have a value enclosed in double quotes
1918 #[test]
1919 fn double_quoted() {
1920 let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3);
1921 // 0 ^ = 4 ^ = 16
1922
1923 assert_eq!(
1924 iter.next(),
1925 Some(Ok(Attribute {
1926 key: QName(b"key"),
1927 value: Cow::Borrowed(b"value"),
1928 }))
1929 );
1930 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
1931 assert_eq!(
1932 iter.next(),
1933 Some(Ok(Attribute {
1934 key: QName(b"another"),
1935 value: Cow::Borrowed(b""),
1936 }))
1937 );
1938 assert_eq!(iter.next(), None);
1939 assert_eq!(iter.next(), None);
1940 }
1941
1942 /// Attribute have a value, not enclosed in quotes
1943 #[test]
1944 fn unquoted() {
1945 let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3);
1946 // 0 ^ = 4 ^ = 16
1947
1948 assert_eq!(
1949 iter.next(),
1950 Some(Ok(Attribute {
1951 key: QName(b"key"),
1952 value: Cow::Borrowed(b"value"),
1953 }))
1954 );
1955 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
1956 assert_eq!(
1957 iter.next(),
1958 Some(Ok(Attribute {
1959 key: QName(b"another"),
1960 value: Cow::Borrowed(b""),
1961 }))
1962 );
1963 assert_eq!(iter.next(), None);
1964 assert_eq!(iter.next(), None);
1965 }
1966
1967 /// Only attribute key is present
1968 #[test]
1969 fn key_only() {
1970 let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3);
1971 // 0 ^ = 20
1972
1973 assert_eq!(
1974 iter.next(),
1975 Some(Ok(Attribute {
1976 key: QName(b"key"),
1977 value: Cow::Borrowed(b"value"),
1978 }))
1979 );
1980 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20))));
1981 assert_eq!(
1982 iter.next(),
1983 Some(Ok(Attribute {
1984 key: QName(b"another"),
1985 value: Cow::Borrowed(b""),
1986 }))
1987 );
1988 assert_eq!(iter.next(), None);
1989 assert_eq!(iter.next(), None);
1990 }
1991 }
1992
1993 /// Check for duplicated names is disabled
1994 mod without_check {
1995 use super::*;
1996 use pretty_assertions::assert_eq;
1997
1998 /// Attribute have a value enclosed in single quotes
1999 #[test]
2000 fn single_quoted() {
2001 let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3);
2002 iter.with_checks(false);
2003
2004 assert_eq!(
2005 iter.next(),
2006 Some(Ok(Attribute {
2007 key: QName(b"key"),
2008 value: Cow::Borrowed(b"value"),
2009 }))
2010 );
2011 assert_eq!(
2012 iter.next(),
2013 Some(Ok(Attribute {
2014 key: QName(b"key"),
2015 value: Cow::Borrowed(b"dup"),
2016 }))
2017 );
2018 assert_eq!(
2019 iter.next(),
2020 Some(Ok(Attribute {
2021 key: QName(b"another"),
2022 value: Cow::Borrowed(b""),
2023 }))
2024 );
2025 assert_eq!(iter.next(), None);
2026 assert_eq!(iter.next(), None);
2027 }
2028
2029 /// Attribute have a value enclosed in double quotes
2030 #[test]
2031 fn double_quoted() {
2032 let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3);
2033 iter.with_checks(false);
2034
2035 assert_eq!(
2036 iter.next(),
2037 Some(Ok(Attribute {
2038 key: QName(b"key"),
2039 value: Cow::Borrowed(b"value"),
2040 }))
2041 );
2042 assert_eq!(
2043 iter.next(),
2044 Some(Ok(Attribute {
2045 key: QName(b"key"),
2046 value: Cow::Borrowed(b"dup"),
2047 }))
2048 );
2049 assert_eq!(
2050 iter.next(),
2051 Some(Ok(Attribute {
2052 key: QName(b"another"),
2053 value: Cow::Borrowed(b""),
2054 }))
2055 );
2056 assert_eq!(iter.next(), None);
2057 assert_eq!(iter.next(), None);
2058 }
2059
2060 /// Attribute have a value, not enclosed in quotes
2061 #[test]
2062 fn unquoted() {
2063 let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3);
2064 // 0 ^ = 20
2065 iter.with_checks(false);
2066
2067 assert_eq!(
2068 iter.next(),
2069 Some(Ok(Attribute {
2070 key: QName(b"key"),
2071 value: Cow::Borrowed(b"value"),
2072 }))
2073 );
2074 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(20))));
2075 assert_eq!(
2076 iter.next(),
2077 Some(Ok(Attribute {
2078 key: QName(b"another"),
2079 value: Cow::Borrowed(b""),
2080 }))
2081 );
2082 assert_eq!(iter.next(), None);
2083 assert_eq!(iter.next(), None);
2084 }
2085
2086 /// Only attribute key is present
2087 #[test]
2088 fn key_only() {
2089 let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3);
2090 // 0 ^ = 20
2091 iter.with_checks(false);
2092
2093 assert_eq!(
2094 iter.next(),
2095 Some(Ok(Attribute {
2096 key: QName(b"key"),
2097 value: Cow::Borrowed(b"value"),
2098 }))
2099 );
2100 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20))));
2101 assert_eq!(
2102 iter.next(),
2103 Some(Ok(Attribute {
2104 key: QName(b"another"),
2105 value: Cow::Borrowed(b""),
2106 }))
2107 );
2108 assert_eq!(iter.next(), None);
2109 assert_eq!(iter.next(), None);
2110 }
2111 }
2112 }
2113
2114 #[test]
2115 fn mixed_quote() {
2116 let mut iter = Attributes::new(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3);
2117
2118 assert_eq!(
2119 iter.next(),
2120 Some(Ok(Attribute {
2121 key: QName(b"a"),
2122 value: Cow::Borrowed(b"a"),
2123 }))
2124 );
2125 assert_eq!(
2126 iter.next(),
2127 Some(Ok(Attribute {
2128 key: QName(b"b"),
2129 value: Cow::Borrowed(b"b"),
2130 }))
2131 );
2132 assert_eq!(
2133 iter.next(),
2134 Some(Ok(Attribute {
2135 key: QName(b"c"),
2136 value: Cow::Borrowed(br#"cc"cc"#),
2137 }))
2138 );
2139 assert_eq!(
2140 iter.next(),
2141 Some(Ok(Attribute {
2142 key: QName(b"d"),
2143 value: Cow::Borrowed(b"dd'dd"),
2144 }))
2145 );
2146 assert_eq!(iter.next(), None);
2147 assert_eq!(iter.next(), None);
2148 }
2149}
2150
2151/// Checks, how parsing of HTML-style attributes works. Each attribute can be
2152/// in three forms:
2153/// - XML-like: have a value, enclosed in single or double quotes
2154/// - have a value, do not enclosed in quotes
2155/// - without value, key only
2156#[cfg(test)]
2157mod html {
2158 use super::*;
2159 use pretty_assertions::assert_eq;
2160
2161 /// Checked attribute is the single attribute
2162 mod single {
2163 use super::*;
2164 use pretty_assertions::assert_eq;
2165
2166 /// Attribute have a value enclosed in single quotes
2167 #[test]
2168 fn single_quoted() {
2169 let mut iter = Attributes::html(r#"tag key='value'"#, 3);
2170
2171 assert_eq!(
2172 iter.next(),
2173 Some(Ok(Attribute {
2174 key: QName(b"key"),
2175 value: Cow::Borrowed(b"value"),
2176 }))
2177 );
2178 assert_eq!(iter.next(), None);
2179 assert_eq!(iter.next(), None);
2180 }
2181
2182 /// Attribute have a value enclosed in double quotes
2183 #[test]
2184 fn double_quoted() {
2185 let mut iter = Attributes::html(r#"tag key="value""#, 3);
2186
2187 assert_eq!(
2188 iter.next(),
2189 Some(Ok(Attribute {
2190 key: QName(b"key"),
2191 value: Cow::Borrowed(b"value"),
2192 }))
2193 );
2194 assert_eq!(iter.next(), None);
2195 assert_eq!(iter.next(), None);
2196 }
2197
2198 /// Attribute have a value, not enclosed in quotes
2199 #[test]
2200 fn unquoted() {
2201 let mut iter = Attributes::html(r#"tag key=value"#, 3);
2202
2203 assert_eq!(
2204 iter.next(),
2205 Some(Ok(Attribute {
2206 key: QName(b"key"),
2207 value: Cow::Borrowed(b"value"),
2208 }))
2209 );
2210 assert_eq!(iter.next(), None);
2211 assert_eq!(iter.next(), None);
2212 }
2213
2214 /// Only attribute key is present
2215 #[test]
2216 fn key_only() {
2217 let mut iter = Attributes::html(r#"tag key"#, 3);
2218
2219 assert_eq!(
2220 iter.next(),
2221 Some(Ok(Attribute {
2222 key: QName(b"key"),
2223 value: Cow::Borrowed(&[]),
2224 }))
2225 );
2226 assert_eq!(iter.next(), None);
2227 assert_eq!(iter.next(), None);
2228 }
2229
2230 /// Key is started with an invalid symbol (a single quote in this test).
2231 /// Because we do not check validity of keys and values during parsing,
2232 /// that invalid attribute will be returned
2233 #[test]
2234 fn key_start_invalid() {
2235 let mut iter = Attributes::html(r#"tag 'key'='value'"#, 3);
2236
2237 assert_eq!(
2238 iter.next(),
2239 Some(Ok(Attribute {
2240 key: QName(b"'key'"),
2241 value: Cow::Borrowed(b"value"),
2242 }))
2243 );
2244 assert_eq!(iter.next(), None);
2245 assert_eq!(iter.next(), None);
2246 }
2247
2248 /// Key contains an invalid symbol (an ampersand in this test).
2249 /// Because we do not check validity of keys and values during parsing,
2250 /// that invalid attribute will be returned
2251 #[test]
2252 fn key_contains_invalid() {
2253 let mut iter = Attributes::html(r#"tag key&jey='value'"#, 3);
2254
2255 assert_eq!(
2256 iter.next(),
2257 Some(Ok(Attribute {
2258 key: QName(b"key&jey"),
2259 value: Cow::Borrowed(b"value"),
2260 }))
2261 );
2262 assert_eq!(iter.next(), None);
2263 assert_eq!(iter.next(), None);
2264 }
2265
2266 /// Attribute value is missing after `=`
2267 #[test]
2268 fn missed_value() {
2269 let mut iter = Attributes::html(r#"tag key="#, 3);
2270 // 0 ^ = 8
2271
2272 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8))));
2273 assert_eq!(iter.next(), None);
2274 assert_eq!(iter.next(), None);
2275 }
2276 }
2277
2278 /// Checked attribute is the first attribute in the list of many attributes
2279 mod first {
2280 use super::*;
2281 use pretty_assertions::assert_eq;
2282
2283 /// Attribute have a value enclosed in single quotes
2284 #[test]
2285 fn single_quoted() {
2286 let mut iter = Attributes::html(r#"tag key='value' regular='attribute'"#, 3);
2287
2288 assert_eq!(
2289 iter.next(),
2290 Some(Ok(Attribute {
2291 key: QName(b"key"),
2292 value: Cow::Borrowed(b"value"),
2293 }))
2294 );
2295 assert_eq!(
2296 iter.next(),
2297 Some(Ok(Attribute {
2298 key: QName(b"regular"),
2299 value: Cow::Borrowed(b"attribute"),
2300 }))
2301 );
2302 assert_eq!(iter.next(), None);
2303 assert_eq!(iter.next(), None);
2304 }
2305
2306 /// Attribute have a value enclosed in double quotes
2307 #[test]
2308 fn double_quoted() {
2309 let mut iter = Attributes::html(r#"tag key="value" regular='attribute'"#, 3);
2310
2311 assert_eq!(
2312 iter.next(),
2313 Some(Ok(Attribute {
2314 key: QName(b"key"),
2315 value: Cow::Borrowed(b"value"),
2316 }))
2317 );
2318 assert_eq!(
2319 iter.next(),
2320 Some(Ok(Attribute {
2321 key: QName(b"regular"),
2322 value: Cow::Borrowed(b"attribute"),
2323 }))
2324 );
2325 assert_eq!(iter.next(), None);
2326 assert_eq!(iter.next(), None);
2327 }
2328
2329 /// Attribute have a value, not enclosed in quotes
2330 #[test]
2331 fn unquoted() {
2332 let mut iter = Attributes::html(r#"tag key=value regular='attribute'"#, 3);
2333
2334 assert_eq!(
2335 iter.next(),
2336 Some(Ok(Attribute {
2337 key: QName(b"key"),
2338 value: Cow::Borrowed(b"value"),
2339 }))
2340 );
2341 assert_eq!(
2342 iter.next(),
2343 Some(Ok(Attribute {
2344 key: QName(b"regular"),
2345 value: Cow::Borrowed(b"attribute"),
2346 }))
2347 );
2348 assert_eq!(iter.next(), None);
2349 assert_eq!(iter.next(), None);
2350 }
2351
2352 /// Only attribute key is present
2353 #[test]
2354 fn key_only() {
2355 let mut iter = Attributes::html(r#"tag key regular='attribute'"#, 3);
2356
2357 assert_eq!(
2358 iter.next(),
2359 Some(Ok(Attribute {
2360 key: QName(b"key"),
2361 value: Cow::Borrowed(&[]),
2362 }))
2363 );
2364 assert_eq!(
2365 iter.next(),
2366 Some(Ok(Attribute {
2367 key: QName(b"regular"),
2368 value: Cow::Borrowed(b"attribute"),
2369 }))
2370 );
2371 assert_eq!(iter.next(), None);
2372 assert_eq!(iter.next(), None);
2373 }
2374
2375 /// Key is started with an invalid symbol (a single quote in this test).
2376 /// Because we do not check validity of keys and values during parsing,
2377 /// that invalid attribute will be returned
2378 #[test]
2379 fn key_start_invalid() {
2380 let mut iter = Attributes::html(r#"tag 'key'='value' regular='attribute'"#, 3);
2381
2382 assert_eq!(
2383 iter.next(),
2384 Some(Ok(Attribute {
2385 key: QName(b"'key'"),
2386 value: Cow::Borrowed(b"value"),
2387 }))
2388 );
2389 assert_eq!(
2390 iter.next(),
2391 Some(Ok(Attribute {
2392 key: QName(b"regular"),
2393 value: Cow::Borrowed(b"attribute"),
2394 }))
2395 );
2396 assert_eq!(iter.next(), None);
2397 assert_eq!(iter.next(), None);
2398 }
2399
2400 /// Key contains an invalid symbol (an ampersand in this test).
2401 /// Because we do not check validity of keys and values during parsing,
2402 /// that invalid attribute will be returned
2403 #[test]
2404 fn key_contains_invalid() {
2405 let mut iter = Attributes::html(r#"tag key&jey='value' regular='attribute'"#, 3);
2406
2407 assert_eq!(
2408 iter.next(),
2409 Some(Ok(Attribute {
2410 key: QName(b"key&jey"),
2411 value: Cow::Borrowed(b"value"),
2412 }))
2413 );
2414 assert_eq!(
2415 iter.next(),
2416 Some(Ok(Attribute {
2417 key: QName(b"regular"),
2418 value: Cow::Borrowed(b"attribute"),
2419 }))
2420 );
2421 assert_eq!(iter.next(), None);
2422 assert_eq!(iter.next(), None);
2423 }
2424
2425 /// Attribute value is missing after `=`
2426 #[test]
2427 fn missed_value() {
2428 let mut iter = Attributes::html(r#"tag key= regular='attribute'"#, 3);
2429
2430 // Because we do not check validity of keys and values during parsing,
2431 // "regular='attribute'" is considered as unquoted attribute value
2432 assert_eq!(
2433 iter.next(),
2434 Some(Ok(Attribute {
2435 key: QName(b"key"),
2436 value: Cow::Borrowed(b"regular='attribute'"),
2437 }))
2438 );
2439 assert_eq!(iter.next(), None);
2440 assert_eq!(iter.next(), None);
2441
2442 ////////////////////////////////////////////////////////////////////
2443
2444 let mut iter = Attributes::html(r#"tag key= regular= 'attribute'"#, 3);
2445
2446 // Because we do not check validity of keys and values during parsing,
2447 // "regular=" is considered as unquoted attribute value
2448 assert_eq!(
2449 iter.next(),
2450 Some(Ok(Attribute {
2451 key: QName(b"key"),
2452 value: Cow::Borrowed(b"regular="),
2453 }))
2454 );
2455 // Because we do not check validity of keys and values during parsing,
2456 // "'attribute'" is considered as key-only attribute
2457 assert_eq!(
2458 iter.next(),
2459 Some(Ok(Attribute {
2460 key: QName(b"'attribute'"),
2461 value: Cow::Borrowed(&[]),
2462 }))
2463 );
2464 assert_eq!(iter.next(), None);
2465 assert_eq!(iter.next(), None);
2466
2467 ////////////////////////////////////////////////////////////////////
2468
2469 let mut iter = Attributes::html(r#"tag key= regular ='attribute'"#, 3);
2470
2471 // Because we do not check validity of keys and values during parsing,
2472 // "regular" is considered as unquoted attribute value
2473 assert_eq!(
2474 iter.next(),
2475 Some(Ok(Attribute {
2476 key: QName(b"key"),
2477 value: Cow::Borrowed(b"regular"),
2478 }))
2479 );
2480 // Because we do not check validity of keys and values during parsing,
2481 // "='attribute'" is considered as key-only attribute
2482 assert_eq!(
2483 iter.next(),
2484 Some(Ok(Attribute {
2485 key: QName(b"='attribute'"),
2486 value: Cow::Borrowed(&[]),
2487 }))
2488 );
2489 assert_eq!(iter.next(), None);
2490 assert_eq!(iter.next(), None);
2491
2492 ////////////////////////////////////////////////////////////////////
2493
2494 let mut iter = Attributes::html(r#"tag key= regular = 'attribute'"#, 3);
2495 // 0 ^ = 9 ^ = 19 ^ = 30
2496
2497 // Because we do not check validity of keys and values during parsing,
2498 // "regular" is considered as unquoted attribute value
2499 assert_eq!(
2500 iter.next(),
2501 Some(Ok(Attribute {
2502 key: QName(b"key"),
2503 value: Cow::Borrowed(b"regular"),
2504 }))
2505 );
2506 // Because we do not check validity of keys and values during parsing,
2507 // "=" is considered as key-only attribute
2508 assert_eq!(
2509 iter.next(),
2510 Some(Ok(Attribute {
2511 key: QName(b"="),
2512 value: Cow::Borrowed(&[]),
2513 }))
2514 );
2515 // Because we do not check validity of keys and values during parsing,
2516 // "'attribute'" is considered as key-only attribute
2517 assert_eq!(
2518 iter.next(),
2519 Some(Ok(Attribute {
2520 key: QName(b"'attribute'"),
2521 value: Cow::Borrowed(&[]),
2522 }))
2523 );
2524 assert_eq!(iter.next(), None);
2525 assert_eq!(iter.next(), None);
2526 }
2527 }
2528
2529 /// Copy of single, but with additional spaces in markup
2530 mod sparsed {
2531 use super::*;
2532 use pretty_assertions::assert_eq;
2533
2534 /// Attribute have a value enclosed in single quotes
2535 #[test]
2536 fn single_quoted() {
2537 let mut iter = Attributes::html(r#"tag key = 'value' "#, 3);
2538
2539 assert_eq!(
2540 iter.next(),
2541 Some(Ok(Attribute {
2542 key: QName(b"key"),
2543 value: Cow::Borrowed(b"value"),
2544 }))
2545 );
2546 assert_eq!(iter.next(), None);
2547 assert_eq!(iter.next(), None);
2548 }
2549
2550 /// Attribute have a value enclosed in double quotes
2551 #[test]
2552 fn double_quoted() {
2553 let mut iter = Attributes::html(r#"tag key = "value" "#, 3);
2554
2555 assert_eq!(
2556 iter.next(),
2557 Some(Ok(Attribute {
2558 key: QName(b"key"),
2559 value: Cow::Borrowed(b"value"),
2560 }))
2561 );
2562 assert_eq!(iter.next(), None);
2563 assert_eq!(iter.next(), None);
2564 }
2565
2566 /// Attribute have a value, not enclosed in quotes
2567 #[test]
2568 fn unquoted() {
2569 let mut iter = Attributes::html(r#"tag key = value "#, 3);
2570
2571 assert_eq!(
2572 iter.next(),
2573 Some(Ok(Attribute {
2574 key: QName(b"key"),
2575 value: Cow::Borrowed(b"value"),
2576 }))
2577 );
2578 assert_eq!(iter.next(), None);
2579 assert_eq!(iter.next(), None);
2580 }
2581
2582 /// Only attribute key is present
2583 #[test]
2584 fn key_only() {
2585 let mut iter = Attributes::html(r#"tag key "#, 3);
2586
2587 assert_eq!(
2588 iter.next(),
2589 Some(Ok(Attribute {
2590 key: QName(b"key"),
2591 value: Cow::Borrowed(&[]),
2592 }))
2593 );
2594 assert_eq!(iter.next(), None);
2595 assert_eq!(iter.next(), None);
2596 }
2597
2598 /// Key is started with an invalid symbol (a single quote in this test).
2599 /// Because we do not check validity of keys and values during parsing,
2600 /// that invalid attribute will be returned
2601 #[test]
2602 fn key_start_invalid() {
2603 let mut iter = Attributes::html(r#"tag 'key' = 'value' "#, 3);
2604
2605 assert_eq!(
2606 iter.next(),
2607 Some(Ok(Attribute {
2608 key: QName(b"'key'"),
2609 value: Cow::Borrowed(b"value"),
2610 }))
2611 );
2612 assert_eq!(iter.next(), None);
2613 assert_eq!(iter.next(), None);
2614 }
2615
2616 /// Key contains an invalid symbol (an ampersand in this test).
2617 /// Because we do not check validity of keys and values during parsing,
2618 /// that invalid attribute will be returned
2619 #[test]
2620 fn key_contains_invalid() {
2621 let mut iter = Attributes::html(r#"tag key&jey = 'value' "#, 3);
2622
2623 assert_eq!(
2624 iter.next(),
2625 Some(Ok(Attribute {
2626 key: QName(b"key&jey"),
2627 value: Cow::Borrowed(b"value"),
2628 }))
2629 );
2630 assert_eq!(iter.next(), None);
2631 assert_eq!(iter.next(), None);
2632 }
2633
2634 /// Attribute value is missing after `=`
2635 #[test]
2636 fn missed_value() {
2637 let mut iter = Attributes::html(r#"tag key = "#, 3);
2638 // 0 ^ = 10
2639
2640 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10))));
2641 assert_eq!(iter.next(), None);
2642 assert_eq!(iter.next(), None);
2643 }
2644 }
2645
2646 /// Checks that duplicated attributes correctly reported and recovering is
2647 /// possible after that
2648 mod duplicated {
2649 use super::*;
2650
2651 mod with_check {
2652 use super::*;
2653 use pretty_assertions::assert_eq;
2654
2655 /// Attribute have a value enclosed in single quotes
2656 #[test]
2657 fn single_quoted() {
2658 let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3);
2659 // 0 ^ = 4 ^ = 16
2660
2661 assert_eq!(
2662 iter.next(),
2663 Some(Ok(Attribute {
2664 key: QName(b"key"),
2665 value: Cow::Borrowed(b"value"),
2666 }))
2667 );
2668 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2669 assert_eq!(
2670 iter.next(),
2671 Some(Ok(Attribute {
2672 key: QName(b"another"),
2673 value: Cow::Borrowed(b""),
2674 }))
2675 );
2676 assert_eq!(iter.next(), None);
2677 assert_eq!(iter.next(), None);
2678 }
2679
2680 /// Attribute have a value enclosed in double quotes
2681 #[test]
2682 fn double_quoted() {
2683 let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3);
2684 // 0 ^ = 4 ^ = 16
2685
2686 assert_eq!(
2687 iter.next(),
2688 Some(Ok(Attribute {
2689 key: QName(b"key"),
2690 value: Cow::Borrowed(b"value"),
2691 }))
2692 );
2693 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2694 assert_eq!(
2695 iter.next(),
2696 Some(Ok(Attribute {
2697 key: QName(b"another"),
2698 value: Cow::Borrowed(b""),
2699 }))
2700 );
2701 assert_eq!(iter.next(), None);
2702 assert_eq!(iter.next(), None);
2703 }
2704
2705 /// Attribute have a value, not enclosed in quotes
2706 #[test]
2707 fn unquoted() {
2708 let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3);
2709 // 0 ^ = 4 ^ = 16
2710
2711 assert_eq!(
2712 iter.next(),
2713 Some(Ok(Attribute {
2714 key: QName(b"key"),
2715 value: Cow::Borrowed(b"value"),
2716 }))
2717 );
2718 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2719 assert_eq!(
2720 iter.next(),
2721 Some(Ok(Attribute {
2722 key: QName(b"another"),
2723 value: Cow::Borrowed(b""),
2724 }))
2725 );
2726 assert_eq!(iter.next(), None);
2727 assert_eq!(iter.next(), None);
2728 }
2729
2730 /// Only attribute key is present
2731 #[test]
2732 fn key_only() {
2733 let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3);
2734 // 0 ^ = 4 ^ = 16
2735
2736 assert_eq!(
2737 iter.next(),
2738 Some(Ok(Attribute {
2739 key: QName(b"key"),
2740 value: Cow::Borrowed(b"value"),
2741 }))
2742 );
2743 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2744 assert_eq!(
2745 iter.next(),
2746 Some(Ok(Attribute {
2747 key: QName(b"another"),
2748 value: Cow::Borrowed(b""),
2749 }))
2750 );
2751 assert_eq!(iter.next(), None);
2752 assert_eq!(iter.next(), None);
2753 }
2754 }
2755
2756 /// Check for duplicated names is disabled
2757 mod without_check {
2758 use super::*;
2759 use pretty_assertions::assert_eq;
2760
2761 /// Attribute have a value enclosed in single quotes
2762 #[test]
2763 fn single_quoted() {
2764 let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3);
2765 iter.with_checks(false);
2766
2767 assert_eq!(
2768 iter.next(),
2769 Some(Ok(Attribute {
2770 key: QName(b"key"),
2771 value: Cow::Borrowed(b"value"),
2772 }))
2773 );
2774 assert_eq!(
2775 iter.next(),
2776 Some(Ok(Attribute {
2777 key: QName(b"key"),
2778 value: Cow::Borrowed(b"dup"),
2779 }))
2780 );
2781 assert_eq!(
2782 iter.next(),
2783 Some(Ok(Attribute {
2784 key: QName(b"another"),
2785 value: Cow::Borrowed(b""),
2786 }))
2787 );
2788 assert_eq!(iter.next(), None);
2789 assert_eq!(iter.next(), None);
2790 }
2791
2792 /// Attribute have a value enclosed in double quotes
2793 #[test]
2794 fn double_quoted() {
2795 let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3);
2796 iter.with_checks(false);
2797
2798 assert_eq!(
2799 iter.next(),
2800 Some(Ok(Attribute {
2801 key: QName(b"key"),
2802 value: Cow::Borrowed(b"value"),
2803 }))
2804 );
2805 assert_eq!(
2806 iter.next(),
2807 Some(Ok(Attribute {
2808 key: QName(b"key"),
2809 value: Cow::Borrowed(b"dup"),
2810 }))
2811 );
2812 assert_eq!(
2813 iter.next(),
2814 Some(Ok(Attribute {
2815 key: QName(b"another"),
2816 value: Cow::Borrowed(b""),
2817 }))
2818 );
2819 assert_eq!(iter.next(), None);
2820 assert_eq!(iter.next(), None);
2821 }
2822
2823 /// Attribute have a value, not enclosed in quotes
2824 #[test]
2825 fn unquoted() {
2826 let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3);
2827 iter.with_checks(false);
2828
2829 assert_eq!(
2830 iter.next(),
2831 Some(Ok(Attribute {
2832 key: QName(b"key"),
2833 value: Cow::Borrowed(b"value"),
2834 }))
2835 );
2836 assert_eq!(
2837 iter.next(),
2838 Some(Ok(Attribute {
2839 key: QName(b"key"),
2840 value: Cow::Borrowed(b"dup"),
2841 }))
2842 );
2843 assert_eq!(
2844 iter.next(),
2845 Some(Ok(Attribute {
2846 key: QName(b"another"),
2847 value: Cow::Borrowed(b""),
2848 }))
2849 );
2850 assert_eq!(iter.next(), None);
2851 assert_eq!(iter.next(), None);
2852 }
2853
2854 /// Only attribute key is present
2855 #[test]
2856 fn key_only() {
2857 let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3);
2858 iter.with_checks(false);
2859
2860 assert_eq!(
2861 iter.next(),
2862 Some(Ok(Attribute {
2863 key: QName(b"key"),
2864 value: Cow::Borrowed(b"value"),
2865 }))
2866 );
2867 assert_eq!(
2868 iter.next(),
2869 Some(Ok(Attribute {
2870 key: QName(b"key"),
2871 value: Cow::Borrowed(&[]),
2872 }))
2873 );
2874 assert_eq!(
2875 iter.next(),
2876 Some(Ok(Attribute {
2877 key: QName(b"another"),
2878 value: Cow::Borrowed(b""),
2879 }))
2880 );
2881 assert_eq!(iter.next(), None);
2882 assert_eq!(iter.next(), None);
2883 }
2884 }
2885 }
2886
2887 #[test]
2888 fn mixed_quote() {
2889 let mut iter = Attributes::html(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3);
2890
2891 assert_eq!(
2892 iter.next(),
2893 Some(Ok(Attribute {
2894 key: QName(b"a"),
2895 value: Cow::Borrowed(b"a"),
2896 }))
2897 );
2898 assert_eq!(
2899 iter.next(),
2900 Some(Ok(Attribute {
2901 key: QName(b"b"),
2902 value: Cow::Borrowed(b"b"),
2903 }))
2904 );
2905 assert_eq!(
2906 iter.next(),
2907 Some(Ok(Attribute {
2908 key: QName(b"c"),
2909 value: Cow::Borrowed(br#"cc"cc"#),
2910 }))
2911 );
2912 assert_eq!(
2913 iter.next(),
2914 Some(Ok(Attribute {
2915 key: QName(b"d"),
2916 value: Cow::Borrowed(b"dd'dd"),
2917 }))
2918 );
2919 assert_eq!(iter.next(), None);
2920 assert_eq!(iter.next(), None);
2921 }
2922}