quick_xml/events/mod.rs
1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52 escape, minimal_escape, normalize_xml10_eols, normalize_xml11_eols, parse_number,
53 partial_escape, EscapeError,
54};
55use crate::name::{LocalName, QName};
56use crate::utils::{self, name_len, trim_xml_end, trim_xml_start, write_cow_string};
57use crate::XmlVersion;
58use attributes::{AttrError, Attribute, Attributes};
59
60/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
61///
62/// The name can be accessed using the [`name`] or [`local_name`] methods.
63/// An iterator over the attributes is returned by the [`attributes`] method.
64///
65/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
66/// returns the content of this event between `<` and `>` or `/>`:
67///
68/// ```
69/// # use quick_xml::events::{BytesStart, Event};
70/// # use quick_xml::reader::Reader;
71/// # use pretty_assertions::assert_eq;
72/// // Remember, that \ at the end of string literal strips
73/// // all space characters to the first non-space character
74/// let mut reader = Reader::from_str("\
75/// <element a1 = 'val1' a2=\"val2\" />\
76/// <element a1 = 'val1' a2=\"val2\" >"
77/// );
78/// let content = "element a1 = 'val1' a2=\"val2\" ";
79/// let event = BytesStart::from_content(content, 7);
80///
81/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
82/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
83/// // deref coercion of &BytesStart to &[u8]
84/// assert_eq!(&event as &[u8], content.as_bytes());
85/// // AsRef<[u8]> for &T + deref coercion
86/// assert_eq!(event.as_ref(), content.as_bytes());
87/// ```
88///
89/// [`name`]: Self::name
90/// [`local_name`]: Self::local_name
91/// [`attributes`]: Self::attributes
92#[derive(Clone, Eq, PartialEq)]
93pub struct BytesStart<'a> {
94 /// content of the element, before any utf8 conversion
95 pub(crate) buf: Cow<'a, [u8]>,
96 /// end of the element name, the name starts at that the start of `buf`
97 pub(crate) name_len: usize,
98 /// Encoding used for `buf`
99 decoder: Decoder,
100}
101
102impl<'a> BytesStart<'a> {
103 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
104 #[inline]
105 pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
106 BytesStart {
107 buf: Cow::Borrowed(content),
108 name_len,
109 decoder,
110 }
111 }
112
113 /// Creates a new `BytesStart` from the given name.
114 ///
115 /// # Warning
116 ///
117 /// `name` must be a valid name.
118 #[inline]
119 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
120 let buf = str_cow_to_bytes(name);
121 BytesStart {
122 name_len: buf.len(),
123 buf,
124 decoder: Decoder::utf8(),
125 }
126 }
127
128 /// Creates a new `BytesStart` from the given content (name + attributes).
129 ///
130 /// # Warning
131 ///
132 /// `&content[..name_len]` must be a valid name, and the remainder of `content`
133 /// must be correctly-formed attributes. Neither are checked, it is possible
134 /// to generate invalid XML if `content` or `name_len` are incorrect.
135 #[inline]
136 pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
137 BytesStart {
138 buf: str_cow_to_bytes(content),
139 name_len,
140 decoder: Decoder::utf8(),
141 }
142 }
143
144 /// Converts the event into an owned event.
145 pub fn into_owned(self) -> BytesStart<'static> {
146 BytesStart {
147 buf: Cow::Owned(self.buf.into_owned()),
148 name_len: self.name_len,
149 decoder: self.decoder,
150 }
151 }
152
153 /// Converts the event into an owned event without taking ownership of Event
154 pub fn to_owned(&self) -> BytesStart<'static> {
155 BytesStart {
156 buf: Cow::Owned(self.buf.clone().into_owned()),
157 name_len: self.name_len,
158 decoder: self.decoder,
159 }
160 }
161
162 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
163 ///
164 /// # Example
165 ///
166 /// ```
167 /// use quick_xml::events::{BytesStart, Event};
168 /// # use quick_xml::writer::Writer;
169 /// # use quick_xml::Error;
170 ///
171 /// struct SomeStruct<'a> {
172 /// attrs: BytesStart<'a>,
173 /// // ...
174 /// }
175 /// # impl<'a> SomeStruct<'a> {
176 /// # fn example(&self) -> Result<(), Error> {
177 /// # let mut writer = Writer::new(Vec::new());
178 ///
179 /// writer.write_event(Event::Start(self.attrs.borrow()))?;
180 /// // ...
181 /// writer.write_event(Event::End(self.attrs.to_end()))?;
182 /// # Ok(())
183 /// # }}
184 /// ```
185 ///
186 /// [`to_end`]: Self::to_end
187 pub fn borrow(&self) -> BytesStart<'_> {
188 BytesStart {
189 buf: Cow::Borrowed(&self.buf),
190 name_len: self.name_len,
191 decoder: self.decoder,
192 }
193 }
194
195 /// Creates new paired close tag
196 #[inline]
197 pub fn to_end(&self) -> BytesEnd<'_> {
198 BytesEnd::from(self.name())
199 }
200
201 /// Get the decoder, used to decode bytes, read by the reader which produces
202 /// this event, to the strings.
203 ///
204 /// When event was created manually, encoding is UTF-8.
205 ///
206 /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
207 /// defaults to UTF-8.
208 ///
209 /// [`encoding`]: ../index.html#encoding
210 #[inline]
211 pub const fn decoder(&self) -> Decoder {
212 self.decoder
213 }
214
215 /// Gets the undecoded raw tag name, as present in the input stream.
216 #[inline]
217 pub fn name(&self) -> QName<'_> {
218 QName(&self.buf[..self.name_len])
219 }
220
221 /// Gets the undecoded raw local tag name (excluding namespace) as present
222 /// in the input stream.
223 ///
224 /// All content up to and including the first `:` character is removed from the tag name.
225 #[inline]
226 pub fn local_name(&self) -> LocalName<'_> {
227 self.name().into()
228 }
229
230 /// Edit the name of the BytesStart in-place
231 ///
232 /// # Warning
233 ///
234 /// `name` must be a valid name.
235 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
236 let bytes = self.buf.to_mut();
237 bytes.splice(..self.name_len, name.iter().cloned());
238 self.name_len = name.len();
239 self
240 }
241}
242
243/// Attribute-related methods
244impl<'a> BytesStart<'a> {
245 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
246 ///
247 /// The yielded items must be convertible to [`Attribute`] using `Into`.
248 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
249 where
250 I: IntoIterator,
251 I::Item: Into<Attribute<'b>>,
252 {
253 self.extend_attributes(attributes);
254 self
255 }
256
257 /// Add additional attributes to this tag using an iterator.
258 ///
259 /// The yielded items must be convertible to [`Attribute`] using `Into`.
260 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
261 where
262 I: IntoIterator,
263 I::Item: Into<Attribute<'b>>,
264 {
265 for attr in attributes {
266 self.push_attribute(attr);
267 }
268 self
269 }
270
271 /// Adds an attribute to this element.
272 pub fn push_attribute<'b, A>(&mut self, attr: A)
273 where
274 A: Into<Attribute<'b>>,
275 {
276 self.buf.to_mut().push(b' ');
277 self.push_attr(attr.into());
278 }
279
280 /// Remove all attributes from the ByteStart
281 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
282 self.buf.to_mut().truncate(self.name_len);
283 self
284 }
285
286 /// Returns an iterator over the attributes of this tag.
287 pub fn attributes(&self) -> Attributes<'_> {
288 Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
289 }
290
291 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
292 pub fn html_attributes(&self) -> Attributes<'_> {
293 Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
294 }
295
296 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
297 /// including the whitespace after the tag name if there is any.
298 #[inline]
299 pub fn attributes_raw(&self) -> &[u8] {
300 &self.buf[self.name_len..]
301 }
302
303 /// Try to get an attribute
304 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
305 &'a self,
306 attr_name: N,
307 ) -> Result<Option<Attribute<'a>>, AttrError> {
308 for a in self.attributes().with_checks(false) {
309 let a = a?;
310 if a.key.as_ref() == attr_name.as_ref() {
311 return Ok(Some(a));
312 }
313 }
314 Ok(None)
315 }
316
317 /// Adds an attribute to this element.
318 pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
319 let bytes = self.buf.to_mut();
320 bytes.extend_from_slice(attr.key.as_ref());
321 bytes.extend_from_slice(b"=\"");
322 // FIXME: need to escape attribute content
323 bytes.extend_from_slice(attr.value.as_ref());
324 bytes.push(b'"');
325 }
326
327 /// Adds new line in existing element
328 pub(crate) fn push_newline(&mut self) {
329 self.buf.to_mut().push(b'\n');
330 }
331
332 /// Adds indentation bytes in existing element
333 pub(crate) fn push_indent(&mut self, indent: &[u8]) {
334 self.buf.to_mut().extend_from_slice(indent);
335 }
336}
337
338impl<'a> Debug for BytesStart<'a> {
339 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
340 write!(f, "BytesStart {{ buf: ")?;
341 write_cow_string(f, &self.buf)?;
342 write!(f, ", name_len: {} }}", self.name_len)
343 }
344}
345
346impl<'a> Deref for BytesStart<'a> {
347 type Target = [u8];
348
349 fn deref(&self) -> &[u8] {
350 &self.buf
351 }
352}
353
354#[cfg(feature = "arbitrary")]
355impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
356 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
357 let s = <&str>::arbitrary(u)?;
358 if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
359 return Err(arbitrary::Error::IncorrectFormat);
360 }
361 let mut result = Self::new(s);
362 result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?);
363 Ok(result)
364 }
365
366 fn size_hint(depth: usize) -> (usize, Option<usize>) {
367 <&str as arbitrary::Arbitrary>::size_hint(depth)
368 }
369}
370
371////////////////////////////////////////////////////////////////////////////////////////////////////
372
373/// Closing tag data (`Event::End`): `</name>`.
374///
375/// The name can be accessed using the [`name`] or [`local_name`] methods.
376///
377/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
378/// returns the content of this event between `</` and `>`.
379///
380/// Note, that inner text will not contain `>` character inside:
381///
382/// ```
383/// # use quick_xml::events::{BytesEnd, Event};
384/// # use quick_xml::reader::Reader;
385/// # use pretty_assertions::assert_eq;
386/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
387/// // Note, that this entire string considered as a .name()
388/// let content = "element a1 = 'val1' a2=\"val2\" ";
389/// let event = BytesEnd::new(content);
390///
391/// reader.config_mut().trim_markup_names_in_closing_tags = false;
392/// reader.config_mut().check_end_names = false;
393/// reader.read_event().unwrap(); // Skip `<element>`
394///
395/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
396/// assert_eq!(event.name().as_ref(), content.as_bytes());
397/// // deref coercion of &BytesEnd to &[u8]
398/// assert_eq!(&event as &[u8], content.as_bytes());
399/// // AsRef<[u8]> for &T + deref coercion
400/// assert_eq!(event.as_ref(), content.as_bytes());
401/// ```
402///
403/// [`name`]: Self::name
404/// [`local_name`]: Self::local_name
405#[derive(Clone, Eq, PartialEq)]
406pub struct BytesEnd<'a> {
407 name: Cow<'a, [u8]>,
408}
409
410impl<'a> BytesEnd<'a> {
411 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
412 #[inline]
413 pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
414 BytesEnd { name }
415 }
416
417 /// Creates a new `BytesEnd` borrowing a slice.
418 ///
419 /// # Warning
420 ///
421 /// `name` must be a valid name.
422 #[inline]
423 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
424 Self::wrap(str_cow_to_bytes(name))
425 }
426
427 /// Converts the event into an owned event.
428 pub fn into_owned(self) -> BytesEnd<'static> {
429 BytesEnd {
430 name: Cow::Owned(self.name.into_owned()),
431 }
432 }
433
434 /// Converts the event into a borrowed event.
435 #[inline]
436 pub fn borrow(&self) -> BytesEnd<'_> {
437 BytesEnd {
438 name: Cow::Borrowed(&self.name),
439 }
440 }
441
442 /// Gets the undecoded raw tag name, as present in the input stream.
443 #[inline]
444 pub fn name(&self) -> QName<'_> {
445 QName(&self.name)
446 }
447
448 /// Gets the undecoded raw local tag name (excluding namespace) as present
449 /// in the input stream.
450 ///
451 /// All content up to and including the first `:` character is removed from the tag name.
452 #[inline]
453 pub fn local_name(&self) -> LocalName<'_> {
454 self.name().into()
455 }
456}
457
458impl<'a> Debug for BytesEnd<'a> {
459 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
460 write!(f, "BytesEnd {{ name: ")?;
461 write_cow_string(f, &self.name)?;
462 write!(f, " }}")
463 }
464}
465
466impl<'a> Deref for BytesEnd<'a> {
467 type Target = [u8];
468
469 fn deref(&self) -> &[u8] {
470 &self.name
471 }
472}
473
474impl<'a> From<QName<'a>> for BytesEnd<'a> {
475 #[inline]
476 fn from(name: QName<'a>) -> Self {
477 Self::wrap(name.into_inner().into())
478 }
479}
480
481#[cfg(feature = "arbitrary")]
482impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
483 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
484 Ok(Self::new(<&str>::arbitrary(u)?))
485 }
486 fn size_hint(depth: usize) -> (usize, Option<usize>) {
487 <&str as arbitrary::Arbitrary>::size_hint(depth)
488 }
489}
490
491////////////////////////////////////////////////////////////////////////////////////////////////////
492
493/// Data from various events (most notably, `Event::Text`).
494///
495/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
496/// returns the content of this event. In case of comment this is everything
497/// between `<!--` and `-->` and the text of comment may not contain `-->` inside
498/// (if [`Config::check_comments`] is set to `true`).
499/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
500/// (i.e. in case of DTD the first character is never space):
501///
502/// ```
503/// # use quick_xml::events::{BytesText, Event};
504/// # use quick_xml::reader::Reader;
505/// # use pretty_assertions::assert_eq;
506/// // Remember, that \ at the end of string literal strips
507/// // all space characters to the first non-space character
508/// let mut reader = Reader::from_str("\
509/// <!DOCTYPE comment or text >\
510/// comment or text \
511/// <!--comment or text -->"
512/// );
513/// let content = "comment or text ";
514/// let event = BytesText::new(content);
515///
516/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
517/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
518/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
519/// // deref coercion of &BytesText to &[u8]
520/// assert_eq!(&event as &[u8], content.as_bytes());
521/// // AsRef<[u8]> for &T + deref coercion
522/// assert_eq!(event.as_ref(), content.as_bytes());
523/// ```
524///
525/// [`Config::check_comments`]: crate::reader::Config::check_comments
526#[derive(Clone, Eq, PartialEq)]
527pub struct BytesText<'a> {
528 /// Escaped then encoded content of the event. Content is encoded in the XML
529 /// document encoding when event comes from the reader and should be in the
530 /// document encoding when event passed to the writer
531 content: Cow<'a, [u8]>,
532 /// Encoding in which the `content` is stored inside the event
533 decoder: Decoder,
534}
535
536impl<'a> BytesText<'a> {
537 /// Creates a new `BytesText` from a raw byte sequence as it appeared in th XML
538 /// source in the specified encoding.
539 #[inline]
540 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
541 Self {
542 content: content.into(),
543 decoder,
544 }
545 }
546
547 /// Creates a new `BytesText` from a raw string as it appeared in the XML source.
548 ///
549 /// # Warning
550 ///
551 /// `content` is not checked to not contain markup or entity references. Be warned
552 /// that writing such event may result to invalid XML if your content contains not
553 /// defined entity references or invalid XML markup.
554 ///
555 /// `content` may have any EOLs, they will be normalized when using [`xml_content()`] getters.
556 ///
557 /// [`xml_content()`]: Self::xml_content
558 #[inline]
559 pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
560 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
561 }
562
563 /// Creates a new `BytesText` from a string.
564 ///
565 /// # Warning
566 ///
567 /// `content` will be escaped using the [`escape`] function, but that may change
568 /// in the future, because events produced by the reader never contains `&` or `<`,
569 /// and escaping of `>`, `"` and `'` is not required. If you want to preserve exact
570 /// content, use [`from_escaped()`] method, but be warned that writing such event
571 /// may result to invalid XML if your content contains not defined entity references
572 /// or invalid XML markup.
573 ///
574 /// `content` may have any EOLs, they will be normalized when using [`xml_content()`] getters.
575 ///
576 /// [`escape`]: crate::escape::escape
577 /// [`from_escaped()`]: Self::from_escaped
578 /// [`xml_content()`]: Self::xml_content
579 #[inline]
580 pub fn new(content: &'a str) -> Self {
581 Self::from_escaped(escape(content))
582 }
583
584 /// Ensures that all data is owned to extend the object's lifetime if
585 /// necessary.
586 #[inline]
587 pub fn into_owned(self) -> BytesText<'static> {
588 BytesText {
589 content: self.content.into_owned().into(),
590 decoder: self.decoder,
591 }
592 }
593
594 /// Extracts the inner `Cow` from the `BytesText` event container.
595 #[inline]
596 pub fn into_inner(self) -> Cow<'a, [u8]> {
597 self.content
598 }
599
600 /// Converts the event into a borrowed event.
601 #[inline]
602 pub fn borrow(&self) -> BytesText<'_> {
603 BytesText {
604 content: Cow::Borrowed(&self.content),
605 decoder: self.decoder,
606 }
607 }
608
609 /// Decodes the content of the event.
610 ///
611 /// This will allocate if the value is encoded in non-UTF-8 encoding.
612 ///
613 /// This method does not normalizes end-of-line characters as required by [specification].
614 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
615 ///
616 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
617 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
618 self.decoder.decode_cow(&self.content)
619 }
620
621 /// Decodes the content of the XML 1.0 or HTML event.
622 ///
623 /// When this event produced by the reader, it uses the encoding information
624 /// associated with that reader to interpret the raw bytes contained within
625 /// this text event.
626 ///
627 /// This will allocate if the value is encoded in non-UTF-8 encoding, or EOL normalization is required.
628 ///
629 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
630 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
631 ///
632 /// This method also can be used to get HTML content, because rules the same.
633 ///
634 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
635 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
636 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
637 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
638 self.decoder.content(&self.content, normalize_xml10_eols)
639 }
640
641 /// Decodes the content of the XML 1.1 event.
642 ///
643 /// When this event produced by the reader, it uses the encoding information
644 /// associated with that reader to interpret the raw bytes contained within
645 /// this text event.
646 ///
647 /// This will allocate if the value is encoded in non-UTF-8 encoding, or EOL normalization is required.
648 ///
649 /// Note, that this method should be used only if event represents XML 1.1 content,
650 /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
651 ///
652 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
653 ///
654 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
655 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
656 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
657 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
658 self.decoder.content(&self.content, normalize_xml11_eols)
659 }
660
661 /// Decodes the content of the XML event according to the specified version.
662 ///
663 /// When this event produced by the reader, it uses the encoding information
664 /// associated with that reader to interpret the raw bytes contained within
665 /// this text event.
666 ///
667 /// This will allocate if the value is encoded in non-UTF-8 encoding, or EOL normalization
668 /// is required.
669 #[inline]
670 pub fn xml_content(&self, version: XmlVersion) -> Result<Cow<'a, str>, EncodingError> {
671 match version {
672 XmlVersion::Explicit1_1 => self.xml11_content(),
673 _ => self.xml10_content(),
674 }
675 }
676
677 /// Alias for [`xml10_content()`](Self::xml10_content).
678 #[inline]
679 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
680 self.xml10_content()
681 }
682
683 /// Removes leading XML whitespace bytes from text content.
684 ///
685 /// Returns `true` if content is empty after that
686 pub fn inplace_trim_start(&mut self) -> bool {
687 self.content = trim_cow(
688 replace(&mut self.content, Cow::Borrowed(b"")),
689 trim_xml_start,
690 );
691 self.content.is_empty()
692 }
693
694 /// Removes trailing XML whitespace bytes from text content.
695 ///
696 /// Returns `true` if content is empty after that
697 pub fn inplace_trim_end(&mut self) -> bool {
698 self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
699 self.content.is_empty()
700 }
701}
702
703impl<'a> Debug for BytesText<'a> {
704 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
705 write!(f, "BytesText {{ content: ")?;
706 write_cow_string(f, &self.content)?;
707 write!(f, " }}")
708 }
709}
710
711impl<'a> Deref for BytesText<'a> {
712 type Target = [u8];
713
714 fn deref(&self) -> &[u8] {
715 &self.content
716 }
717}
718
719#[cfg(feature = "arbitrary")]
720impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
721 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
722 let s = <&str>::arbitrary(u)?;
723 if !s.chars().all(char::is_alphanumeric) {
724 return Err(arbitrary::Error::IncorrectFormat);
725 }
726 Ok(Self::new(s))
727 }
728
729 fn size_hint(depth: usize) -> (usize, Option<usize>) {
730 <&str as arbitrary::Arbitrary>::size_hint(depth)
731 }
732}
733
734////////////////////////////////////////////////////////////////////////////////////////////////////
735
736/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
737/// [convert](Self::escape) it to [`BytesText`].
738///
739/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
740/// returns the content of this event between `<![CDATA[` and `]]>`.
741///
742/// Note, that inner text will not contain `]]>` sequence inside:
743///
744/// ```
745/// # use quick_xml::events::{BytesCData, Event};
746/// # use quick_xml::reader::Reader;
747/// # use pretty_assertions::assert_eq;
748/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
749/// let content = " CDATA section ";
750/// let event = BytesCData::new(content);
751///
752/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
753/// // deref coercion of &BytesCData to &[u8]
754/// assert_eq!(&event as &[u8], content.as_bytes());
755/// // AsRef<[u8]> for &T + deref coercion
756/// assert_eq!(event.as_ref(), content.as_bytes());
757/// ```
758#[derive(Clone, Eq, PartialEq)]
759pub struct BytesCData<'a> {
760 content: Cow<'a, [u8]>,
761 /// Encoding in which the `content` is stored inside the event
762 decoder: Decoder,
763}
764
765impl<'a> BytesCData<'a> {
766 /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
767 #[inline]
768 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
769 Self {
770 content: content.into(),
771 decoder,
772 }
773 }
774
775 /// Creates a new `BytesCData` from a string.
776 ///
777 /// # Warning
778 ///
779 /// `content` must not contain the `]]>` sequence. You can use
780 /// [`BytesCData::escaped`] to escape the content instead.
781 #[inline]
782 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
783 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
784 }
785
786 /// Creates an iterator of `BytesCData` from a string.
787 ///
788 /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
789 /// sections, splitting the `]]` and `>` characters, because the CDATA closing
790 /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
791 /// for each of those sections.
792 ///
793 /// # Examples
794 ///
795 /// ```
796 /// # use quick_xml::events::BytesCData;
797 /// # use pretty_assertions::assert_eq;
798 /// let content = "";
799 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
800 /// assert_eq!(cdata, &[BytesCData::new("")]);
801 ///
802 /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
803 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
804 /// assert_eq!(cdata, &[
805 /// BytesCData::new("Certain tokens like ]]"),
806 /// BytesCData::new("> can be difficult and <invalid>"),
807 /// ]);
808 ///
809 /// let content = "foo]]>bar]]>baz]]>quux";
810 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
811 /// assert_eq!(cdata, &[
812 /// BytesCData::new("foo]]"),
813 /// BytesCData::new(">bar]]"),
814 /// BytesCData::new(">baz]]"),
815 /// BytesCData::new(">quux"),
816 /// ]);
817 /// ```
818 #[inline]
819 pub const fn escaped(content: &'a str) -> CDataIterator<'a> {
820 CDataIterator {
821 inner: utils::CDataIterator::new(content),
822 }
823 }
824
825 /// Ensures that all data is owned to extend the object's lifetime if
826 /// necessary.
827 #[inline]
828 pub fn into_owned(self) -> BytesCData<'static> {
829 BytesCData {
830 content: self.content.into_owned().into(),
831 decoder: self.decoder,
832 }
833 }
834
835 /// Extracts the inner `Cow` from the `BytesCData` event container.
836 #[inline]
837 pub fn into_inner(self) -> Cow<'a, [u8]> {
838 self.content
839 }
840
841 /// Converts the event into a borrowed event.
842 #[inline]
843 pub fn borrow(&self) -> BytesCData<'_> {
844 BytesCData {
845 content: Cow::Borrowed(&self.content),
846 decoder: self.decoder,
847 }
848 }
849
850 /// Converts this CDATA content to an escaped version, that can be written
851 /// as an usual text in XML.
852 ///
853 /// This function performs following replacements:
854 ///
855 /// | Character | Replacement
856 /// |-----------|------------
857 /// | `<` | `<`
858 /// | `>` | `>`
859 /// | `&` | `&`
860 /// | `'` | `'`
861 /// | `"` | `"`
862 pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
863 let decoded = self.decode()?;
864 Ok(BytesText::wrap(
865 match escape(decoded) {
866 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
867 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
868 },
869 Decoder::utf8(),
870 ))
871 }
872
873 /// Converts this CDATA content to an escaped version, that can be written
874 /// as an usual text in XML.
875 ///
876 /// In XML text content, it is allowed (though not recommended) to leave
877 /// the quote special characters `"` and `'` unescaped.
878 ///
879 /// This function performs following replacements:
880 ///
881 /// | Character | Replacement
882 /// |-----------|------------
883 /// | `<` | `<`
884 /// | `>` | `>`
885 /// | `&` | `&`
886 pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
887 let decoded = self.decode()?;
888 Ok(BytesText::wrap(
889 match partial_escape(decoded) {
890 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
891 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
892 },
893 Decoder::utf8(),
894 ))
895 }
896
897 /// Converts this CDATA content to an escaped version, that can be written
898 /// as an usual text in XML. This method escapes only those characters that
899 /// must be escaped according to the [specification].
900 ///
901 /// This function performs following replacements:
902 ///
903 /// | Character | Replacement
904 /// |-----------|------------
905 /// | `<` | `<`
906 /// | `&` | `&`
907 ///
908 /// [specification]: https://www.w3.org/TR/xml11/#syntax
909 pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
910 let decoded = self.decode()?;
911 Ok(BytesText::wrap(
912 match minimal_escape(decoded) {
913 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
914 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
915 },
916 Decoder::utf8(),
917 ))
918 }
919
920 /// Decodes the raw input byte content of the CDATA section into a string,
921 /// without performing XML entity escaping.
922 ///
923 /// When this event produced by the XML reader, it uses the encoding information
924 /// associated with that reader to interpret the raw bytes contained within this
925 /// CDATA event.
926 ///
927 /// This method does not normalizes end-of-line characters as required by [specification].
928 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
929 ///
930 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
931 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
932 self.decoder.decode_cow(&self.content)
933 }
934
935 /// Decodes the raw input byte content of the CDATA section of the XML 1.0 or
936 /// HTML event into a string.
937 ///
938 /// When this event produced by the reader, it uses the encoding information
939 /// associated with that reader to interpret the raw bytes contained within
940 /// this CDATA event.
941 ///
942 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
943 /// is required.
944 ///
945 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
946 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
947 ///
948 /// This method also can be used to get HTML content, because rules the same.
949 ///
950 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
951 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
952 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
953 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
954 self.decoder.content(&self.content, normalize_xml10_eols)
955 }
956
957 /// Decodes the raw input byte content of the CDATA section of the XML 1.1 event
958 /// into a string.
959 ///
960 /// When this event produced by the reader, it uses the encoding information
961 /// associated with that reader to interpret the raw bytes contained within
962 /// this CDATA event.
963 ///
964 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
965 /// is required.
966 ///
967 /// Note, that this method should be used only if event represents XML 1.1 content,
968 /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
969 ///
970 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
971 ///
972 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
973 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
974 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
975 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
976 self.decoder.content(&self.content, normalize_xml11_eols)
977 }
978
979 /// Decodes the raw input byte content of the CDATA section of the XML event
980 /// into a string according to the specified version.
981 ///
982 /// When this event produced by the reader, it uses the encoding information
983 /// associated with that reader to interpret the raw bytes contained within
984 /// this CDATA event.
985 ///
986 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
987 /// is required.
988 #[inline]
989 pub fn xml_content(&self, version: XmlVersion) -> Result<Cow<'a, str>, EncodingError> {
990 match version {
991 XmlVersion::Explicit1_1 => self.xml11_content(),
992 _ => self.xml10_content(),
993 }
994 }
995
996 /// Alias for [`xml10_content()`](Self::xml10_content).
997 #[inline]
998 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
999 self.xml10_content()
1000 }
1001}
1002
1003impl<'a> Debug for BytesCData<'a> {
1004 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1005 write!(f, "BytesCData {{ content: ")?;
1006 write_cow_string(f, &self.content)?;
1007 write!(f, " }}")
1008 }
1009}
1010
1011impl<'a> Deref for BytesCData<'a> {
1012 type Target = [u8];
1013
1014 fn deref(&self) -> &[u8] {
1015 &self.content
1016 }
1017}
1018
1019#[cfg(feature = "arbitrary")]
1020impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
1021 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1022 Ok(Self::new(<&str>::arbitrary(u)?))
1023 }
1024 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1025 <&str as arbitrary::Arbitrary>::size_hint(depth)
1026 }
1027}
1028
1029/// Iterator over `CDATA` sections in a string.
1030///
1031/// This iterator is created by the [`BytesCData::escaped`] method.
1032#[derive(Debug, Clone)]
1033pub struct CDataIterator<'a> {
1034 inner: utils::CDataIterator<'a>,
1035}
1036
1037impl<'a> Iterator for CDataIterator<'a> {
1038 type Item = BytesCData<'a>;
1039
1040 fn next(&mut self) -> Option<BytesCData<'a>> {
1041 self.inner
1042 .next()
1043 .map(|slice| BytesCData::wrap(slice.as_bytes(), Decoder::utf8()))
1044 }
1045}
1046
1047impl FusedIterator for CDataIterator<'_> {}
1048
1049////////////////////////////////////////////////////////////////////////////////////////////////////
1050
1051/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
1052///
1053/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1054/// returns the content of this event between `<?` and `?>`.
1055///
1056/// Note, that inner text will not contain `?>` sequence inside:
1057///
1058/// ```
1059/// # use quick_xml::events::{BytesPI, Event};
1060/// # use quick_xml::reader::Reader;
1061/// # use pretty_assertions::assert_eq;
1062/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
1063/// let content = "processing instruction >:-<~ ";
1064/// let event = BytesPI::new(content);
1065///
1066/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
1067/// // deref coercion of &BytesPI to &[u8]
1068/// assert_eq!(&event as &[u8], content.as_bytes());
1069/// // AsRef<[u8]> for &T + deref coercion
1070/// assert_eq!(event.as_ref(), content.as_bytes());
1071/// ```
1072///
1073/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
1074#[derive(Clone, Eq, PartialEq)]
1075pub struct BytesPI<'a> {
1076 content: BytesStart<'a>,
1077}
1078
1079impl<'a> BytesPI<'a> {
1080 /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1081 #[inline]
1082 pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
1083 Self {
1084 content: BytesStart::wrap(content, target_len, decoder),
1085 }
1086 }
1087
1088 /// Creates a new `BytesPI` from a string.
1089 ///
1090 /// # Warning
1091 ///
1092 /// `content` must not contain the `?>` sequence.
1093 #[inline]
1094 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1095 let buf = str_cow_to_bytes(content);
1096 let name_len = name_len(&buf);
1097 Self {
1098 content: BytesStart {
1099 buf,
1100 name_len,
1101 decoder: Decoder::utf8(),
1102 },
1103 }
1104 }
1105
1106 /// Ensures that all data is owned to extend the object's lifetime if
1107 /// necessary.
1108 #[inline]
1109 pub fn into_owned(self) -> BytesPI<'static> {
1110 BytesPI {
1111 content: self.content.into_owned(),
1112 }
1113 }
1114
1115 /// Extracts the inner `Cow` from the `BytesPI` event container.
1116 #[inline]
1117 pub fn into_inner(self) -> Cow<'a, [u8]> {
1118 self.content.buf
1119 }
1120
1121 /// Converts the event into a borrowed event.
1122 #[inline]
1123 pub fn borrow(&self) -> BytesPI<'_> {
1124 BytesPI {
1125 content: self.content.borrow(),
1126 }
1127 }
1128
1129 /// A target used to identify the application to which the instruction is directed.
1130 ///
1131 /// # Example
1132 ///
1133 /// ```
1134 /// # use pretty_assertions::assert_eq;
1135 /// use quick_xml::events::BytesPI;
1136 ///
1137 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1138 /// assert_eq!(instruction.target(), b"xml-stylesheet");
1139 /// ```
1140 #[inline]
1141 pub fn target(&self) -> &[u8] {
1142 self.content.name().0
1143 }
1144
1145 /// Content of the processing instruction. Contains everything between target
1146 /// name and the end of the instruction. A direct consequence is that the first
1147 /// character is always a space character.
1148 ///
1149 /// # Example
1150 ///
1151 /// ```
1152 /// # use pretty_assertions::assert_eq;
1153 /// use quick_xml::events::BytesPI;
1154 ///
1155 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1156 /// assert_eq!(instruction.content(), br#" href="style.css""#);
1157 /// ```
1158 #[inline]
1159 pub fn content(&self) -> &[u8] {
1160 self.content.attributes_raw()
1161 }
1162
1163 /// A view of the processing instructions' content as a list of key-value pairs.
1164 ///
1165 /// Key-value pairs are used in some processing instructions, for example in
1166 /// `<?xml-stylesheet?>`.
1167 ///
1168 /// Returned iterator does not validate attribute values as may required by
1169 /// target's rules. For example, it doesn't check that substring `?>` is not
1170 /// present in the attribute value. That shouldn't be the problem when event
1171 /// is produced by the reader, because reader detects end of processing instruction
1172 /// by the first `?>` sequence, as required by the specification, and therefore
1173 /// this sequence cannot appear inside it.
1174 ///
1175 /// # Example
1176 ///
1177 /// ```
1178 /// # use pretty_assertions::assert_eq;
1179 /// use std::borrow::Cow;
1180 /// use quick_xml::events::attributes::Attribute;
1181 /// use quick_xml::events::BytesPI;
1182 /// use quick_xml::name::QName;
1183 ///
1184 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1185 /// for attr in instruction.attributes() {
1186 /// assert_eq!(attr, Ok(Attribute {
1187 /// key: QName(b"href"),
1188 /// value: Cow::Borrowed(b"style.css"),
1189 /// }));
1190 /// }
1191 /// ```
1192 #[inline]
1193 pub fn attributes(&self) -> Attributes<'_> {
1194 self.content.attributes()
1195 }
1196}
1197
1198impl<'a> Debug for BytesPI<'a> {
1199 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1200 write!(f, "BytesPI {{ content: ")?;
1201 write_cow_string(f, &self.content.buf)?;
1202 write!(f, " }}")
1203 }
1204}
1205
1206impl<'a> Deref for BytesPI<'a> {
1207 type Target = [u8];
1208
1209 fn deref(&self) -> &[u8] {
1210 &self.content
1211 }
1212}
1213
1214#[cfg(feature = "arbitrary")]
1215impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1216 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1217 Ok(Self::new(<&str>::arbitrary(u)?))
1218 }
1219 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1220 <&str as arbitrary::Arbitrary>::size_hint(depth)
1221 }
1222}
1223
1224////////////////////////////////////////////////////////////////////////////////////////////////////
1225
1226/// An XML declaration (`Event::Decl`).
1227///
1228/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1229///
1230/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1231/// returns the content of this event between `<?` and `?>`.
1232///
1233/// Note, that inner text will not contain `?>` sequence inside:
1234///
1235/// ```
1236/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1237/// # use quick_xml::reader::Reader;
1238/// # use pretty_assertions::assert_eq;
1239/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1240/// let content = "xml version = '1.0' ";
1241/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1242///
1243/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1244/// // deref coercion of &BytesDecl to &[u8]
1245/// assert_eq!(&event as &[u8], content.as_bytes());
1246/// // AsRef<[u8]> for &T + deref coercion
1247/// assert_eq!(event.as_ref(), content.as_bytes());
1248/// ```
1249#[derive(Clone, Debug, Eq, PartialEq)]
1250pub struct BytesDecl<'a> {
1251 content: BytesStart<'a>,
1252}
1253
1254impl<'a> BytesDecl<'a> {
1255 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1256 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1257 /// attribute.
1258 ///
1259 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1260 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1261 /// the double quote character is not allowed in any of the attribute values.
1262 pub fn new(
1263 version: &str,
1264 encoding: Option<&str>,
1265 standalone: Option<&str>,
1266 ) -> BytesDecl<'static> {
1267 // Compute length of the buffer based on supplied attributes
1268 // ' encoding=""' => 12
1269 let encoding_attr_len = if let Some(xs) = encoding {
1270 12 + xs.len()
1271 } else {
1272 0
1273 };
1274 // ' standalone=""' => 14
1275 let standalone_attr_len = if let Some(xs) = standalone {
1276 14 + xs.len()
1277 } else {
1278 0
1279 };
1280 // 'xml version=""' => 14
1281 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1282
1283 buf.push_str("xml version=\"");
1284 buf.push_str(version);
1285
1286 if let Some(encoding_val) = encoding {
1287 buf.push_str("\" encoding=\"");
1288 buf.push_str(encoding_val);
1289 }
1290
1291 if let Some(standalone_val) = standalone {
1292 buf.push_str("\" standalone=\"");
1293 buf.push_str(standalone_val);
1294 }
1295 buf.push('"');
1296
1297 BytesDecl {
1298 content: BytesStart::from_content(buf, 3),
1299 }
1300 }
1301
1302 /// Creates a `BytesDecl` from a `BytesStart`
1303 pub const fn from_start(start: BytesStart<'a>) -> Self {
1304 Self { content: start }
1305 }
1306
1307 /// Gets xml version, excluding quotes (`'` or `"`).
1308 ///
1309 /// According to the [grammar], the version *must* be the first thing in the declaration.
1310 /// This method tries to extract the first thing in the declaration and return it.
1311 /// In case of multiple attributes value of the first one is returned.
1312 ///
1313 /// If version is missed in the declaration, or the first thing is not a version,
1314 /// [`IllFormedError::MissingDeclVersion`] will be returned.
1315 ///
1316 /// # Examples
1317 ///
1318 /// ```
1319 /// use quick_xml::errors::{Error, IllFormedError};
1320 /// use quick_xml::events::{BytesDecl, BytesStart};
1321 ///
1322 /// // <?xml version='1.1'?>
1323 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1324 /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1325 ///
1326 /// // <?xml version='1.0' version='1.1'?>
1327 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1328 /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1329 ///
1330 /// // <?xml encoding='utf-8'?>
1331 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1332 /// match decl.version() {
1333 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1334 /// _ => assert!(false),
1335 /// }
1336 ///
1337 /// // <?xml encoding='utf-8' version='1.1'?>
1338 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1339 /// match decl.version() {
1340 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1341 /// _ => assert!(false),
1342 /// }
1343 ///
1344 /// // <?xml?>
1345 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1346 /// match decl.version() {
1347 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1348 /// _ => assert!(false),
1349 /// }
1350 /// ```
1351 ///
1352 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1353 pub fn version(&self) -> Result<Cow<'_, [u8]>, Error> {
1354 // The version *must* be the first thing in the declaration.
1355 match self.content.attributes().with_checks(false).next() {
1356 Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1357 // first attribute was not "version"
1358 Some(Ok(a)) => {
1359 let found = from_utf8(a.key.as_ref())
1360 .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1361 .to_string();
1362 Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1363 found,
1364 ))))
1365 }
1366 // error parsing attributes
1367 Some(Err(e)) => Err(e.into()),
1368 // no attributes
1369 None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1370 }
1371 }
1372
1373 /// Gets xml encoding, excluding quotes (`'` or `"`).
1374 ///
1375 /// Although according to the [grammar] encoding must appear before `"standalone"`
1376 /// and after `"version"`, this method does not check that. The first occurrence
1377 /// of the attribute will be returned even if there are several. Also, method does
1378 /// not restrict symbols that can forming the encoding, so the returned encoding
1379 /// name may not correspond to the grammar.
1380 ///
1381 /// # Examples
1382 ///
1383 /// ```
1384 /// use std::borrow::Cow;
1385 /// use quick_xml::Error;
1386 /// use quick_xml::events::{BytesDecl, BytesStart};
1387 ///
1388 /// // <?xml version='1.1'?>
1389 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1390 /// assert!(decl.encoding().is_none());
1391 ///
1392 /// // <?xml encoding='utf-8'?>
1393 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1394 /// match decl.encoding() {
1395 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1396 /// _ => assert!(false),
1397 /// }
1398 ///
1399 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1400 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1401 /// match decl.encoding() {
1402 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1403 /// _ => assert!(false),
1404 /// }
1405 /// ```
1406 ///
1407 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1408 pub fn encoding(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1409 self.content
1410 .try_get_attribute("encoding")
1411 .map(|a| a.map(|a| a.value))
1412 .transpose()
1413 }
1414
1415 /// Gets xml standalone, excluding quotes (`'` or `"`).
1416 ///
1417 /// Although according to the [grammar] standalone flag must appear after `"version"`
1418 /// and `"encoding"`, this method does not check that. The first occurrence of the
1419 /// attribute will be returned even if there are several. Also, method does not
1420 /// restrict symbols that can forming the value, so the returned flag name may not
1421 /// correspond to the grammar.
1422 ///
1423 /// # Examples
1424 ///
1425 /// ```
1426 /// use std::borrow::Cow;
1427 /// use quick_xml::Error;
1428 /// use quick_xml::events::{BytesDecl, BytesStart};
1429 ///
1430 /// // <?xml version='1.1'?>
1431 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1432 /// assert!(decl.standalone().is_none());
1433 ///
1434 /// // <?xml standalone='yes'?>
1435 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1436 /// match decl.standalone() {
1437 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1438 /// _ => assert!(false),
1439 /// }
1440 ///
1441 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1442 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1443 /// match decl.standalone() {
1444 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1445 /// _ => assert!(false),
1446 /// }
1447 /// ```
1448 ///
1449 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1450 pub fn standalone(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1451 self.content
1452 .try_get_attribute("standalone")
1453 .map(|a| a.map(|a| a.value))
1454 .transpose()
1455 }
1456
1457 /// Gets XML version as typified enumeration.
1458 ///
1459 /// According to the [grammar], the version *must* be the first thing in the declaration.
1460 /// This method tries to extract the first thing in the declaration and return it.
1461 /// In case of multiple attributes value of the first one is returned.
1462 ///
1463 /// If version is missed in the declaration, or the first thing is not a version,
1464 /// [`IllFormedError::MissingDeclVersion`] will be returned.
1465 ///
1466 /// If version is not 1.0 or 1.1, [`IllFormedError::UnknownVersion`] will be returned.
1467 ///
1468 /// # Examples
1469 ///
1470 /// ```
1471 /// use quick_xml::XmlVersion;
1472 /// use quick_xml::errors::{Error, IllFormedError};
1473 /// use quick_xml::events::{BytesDecl, BytesStart};
1474 ///
1475 /// // <?xml version='1.1'?>
1476 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1477 /// assert_eq!(decl.xml_version().unwrap(), XmlVersion::Explicit1_1);
1478 ///
1479 /// // <?xml version='1.0' version='1.1'?>
1480 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1481 /// assert_eq!(decl.xml_version().unwrap(), XmlVersion::Explicit1_0);
1482 ///
1483 /// // <?xml version='1.2'?>
1484 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.2'", 0));
1485 /// match decl.xml_version() {
1486 /// Err(Error::IllFormed(IllFormedError::UnknownVersion)) => {},
1487 /// _ => assert!(false),
1488 /// }
1489 ///
1490 /// // <?xml encoding='utf-8'?>
1491 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1492 /// match decl.xml_version() {
1493 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1494 /// _ => assert!(false),
1495 /// }
1496 ///
1497 /// // <?xml encoding='utf-8' version='1.1'?>
1498 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1499 /// match decl.xml_version() {
1500 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1501 /// _ => assert!(false),
1502 /// }
1503 ///
1504 /// // <?xml?>
1505 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1506 /// match decl.xml_version() {
1507 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1508 /// _ => assert!(false),
1509 /// }
1510 /// ```
1511 ///
1512 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1513 pub fn xml_version(&self) -> Result<XmlVersion, Error> {
1514 let v = self.version()?;
1515 match v.as_ref() {
1516 b"1.0" => Ok(XmlVersion::Explicit1_0),
1517 b"1.1" => Ok(XmlVersion::Explicit1_1),
1518 _ => Err(Error::IllFormed(IllFormedError::UnknownVersion)),
1519 }
1520 }
1521
1522 /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1523 /// algorithm.
1524 ///
1525 /// If encoding in not known, or `encoding` key was not found, returns `None`.
1526 /// In case of duplicated `encoding` key, encoding, corresponding to the first
1527 /// one, is returned.
1528 #[cfg(feature = "encoding")]
1529 pub fn encoder(&self) -> Option<&'static Encoding> {
1530 self.encoding()
1531 .and_then(|e| e.ok())
1532 .and_then(|e| Encoding::for_label(&e))
1533 }
1534
1535 /// Converts the event into an owned event.
1536 pub fn into_owned(self) -> BytesDecl<'static> {
1537 BytesDecl {
1538 content: self.content.into_owned(),
1539 }
1540 }
1541
1542 /// Converts the event into a borrowed event.
1543 #[inline]
1544 pub fn borrow(&self) -> BytesDecl<'_> {
1545 BytesDecl {
1546 content: self.content.borrow(),
1547 }
1548 }
1549}
1550
1551impl<'a> Deref for BytesDecl<'a> {
1552 type Target = [u8];
1553
1554 fn deref(&self) -> &[u8] {
1555 &self.content
1556 }
1557}
1558
1559#[cfg(feature = "arbitrary")]
1560impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1561 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1562 Ok(Self::new(
1563 <&str>::arbitrary(u)?,
1564 Option::<&str>::arbitrary(u)?,
1565 Option::<&str>::arbitrary(u)?,
1566 ))
1567 }
1568
1569 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1570 <&str as arbitrary::Arbitrary>::size_hint(depth)
1571 }
1572}
1573
1574////////////////////////////////////////////////////////////////////////////////////////////////////
1575
1576/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1577///
1578/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1579/// returns the content of this event between `&` and `;`:
1580///
1581/// ```
1582/// # use quick_xml::events::{BytesRef, Event};
1583/// # use quick_xml::reader::Reader;
1584/// # use pretty_assertions::assert_eq;
1585/// let mut reader = Reader::from_str(r#"&entity;"#);
1586/// let content = "entity";
1587/// let event = BytesRef::new(content);
1588///
1589/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1590/// // deref coercion of &BytesRef to &[u8]
1591/// assert_eq!(&event as &[u8], content.as_bytes());
1592/// // AsRef<[u8]> for &T + deref coercion
1593/// assert_eq!(event.as_ref(), content.as_bytes());
1594/// ```
1595#[derive(Clone, Eq, PartialEq)]
1596pub struct BytesRef<'a> {
1597 content: Cow<'a, [u8]>,
1598 /// Encoding in which the `content` is stored inside the event.
1599 decoder: Decoder,
1600}
1601
1602impl<'a> BytesRef<'a> {
1603 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1604 #[inline]
1605 pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1606 Self {
1607 content: Cow::Borrowed(content),
1608 decoder,
1609 }
1610 }
1611
1612 /// Creates a new `BytesRef` borrowing a slice.
1613 ///
1614 /// # Warning
1615 ///
1616 /// `name` must be a valid name.
1617 #[inline]
1618 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1619 Self {
1620 content: str_cow_to_bytes(name),
1621 decoder: Decoder::utf8(),
1622 }
1623 }
1624
1625 /// Converts the event into an owned event.
1626 pub fn into_owned(self) -> BytesRef<'static> {
1627 BytesRef {
1628 content: Cow::Owned(self.content.into_owned()),
1629 decoder: self.decoder,
1630 }
1631 }
1632
1633 /// Extracts the inner `Cow` from the `BytesRef` event container.
1634 #[inline]
1635 pub fn into_inner(self) -> Cow<'a, [u8]> {
1636 self.content
1637 }
1638
1639 /// Converts the event into a borrowed event.
1640 #[inline]
1641 pub fn borrow(&self) -> BytesRef<'_> {
1642 BytesRef {
1643 content: Cow::Borrowed(&self.content),
1644 decoder: self.decoder,
1645 }
1646 }
1647
1648 /// Decodes the content of the event.
1649 ///
1650 /// This will allocate if the value is encoded in non-UTF-8 encoding.
1651 ///
1652 /// This method does not normalizes end-of-line characters as required by [specification].
1653 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
1654 ///
1655 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
1656 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1657 self.decoder.decode_cow(&self.content)
1658 }
1659
1660 /// Decodes the content of the XML 1.0 or HTML event.
1661 ///
1662 /// When this event produced by the reader, it uses the encoding information
1663 /// associated with that reader to interpret the raw bytes contained within
1664 /// this general reference event.
1665 ///
1666 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1667 /// is required.
1668 ///
1669 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
1670 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1671 ///
1672 /// This method also can be used to get HTML content, because rules the same.
1673 ///
1674 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1675 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1676 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1677 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1678 self.decoder.content(&self.content, normalize_xml10_eols)
1679 }
1680
1681 /// Decodes the content of the XML 1.1 event.
1682 ///
1683 /// When this event produced by the reader, it uses the encoding information
1684 /// associated with that reader to interpret the raw bytes contained within
1685 /// this general reference event.
1686 ///
1687 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1688 /// is required.
1689 ///
1690 /// Note, that this method should be used only if event represents XML 1.1 content,
1691 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1692 ///
1693 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
1694 ///
1695 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1696 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1697 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1698 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1699 self.decoder.content(&self.content, normalize_xml11_eols)
1700 }
1701
1702 /// Decodes the content of the XML event according to the specified version.
1703 ///
1704 /// When this event produced by the reader, it uses the encoding information
1705 /// associated with that reader to interpret the raw bytes contained within
1706 /// this general reference event.
1707 ///
1708 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1709 /// is required.
1710 #[inline]
1711 pub fn xml_content(&self, version: XmlVersion) -> Result<Cow<'a, str>, EncodingError> {
1712 match version {
1713 XmlVersion::Explicit1_1 => self.xml11_content(),
1714 _ => self.xml10_content(),
1715 }
1716 }
1717
1718 /// Alias for [`xml10_content()`](Self::xml10_content).
1719 #[inline]
1720 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1721 self.xml10_content()
1722 }
1723
1724 /// Returns `true` if the specified reference represents the character reference
1725 /// (`&#<number>;`).
1726 ///
1727 /// ```
1728 /// # use quick_xml::events::BytesRef;
1729 /// # use pretty_assertions::assert_eq;
1730 /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1731 /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1732 /// assert_eq!(BytesRef::new("lt" ).is_char_ref(), false);
1733 /// ```
1734 pub fn is_char_ref(&self) -> bool {
1735 matches!(self.content.first(), Some(b'#'))
1736 }
1737
1738 /// If this reference represents character reference, then resolves it and
1739 /// returns the character, otherwise returns `None`.
1740 ///
1741 /// This method does not check if character is allowed for XML, in other words,
1742 /// well-formedness constraint [WFC: Legal Char] is not enforced.
1743 /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1744 ///
1745 /// ```
1746 /// # use quick_xml::events::BytesRef;
1747 /// # use pretty_assertions::assert_eq;
1748 /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1749 /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1750 /// assert_eq!(BytesRef::new("lt" ).resolve_char_ref().unwrap(), None);
1751 /// ```
1752 ///
1753 /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1754 pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1755 if let Some(num) = self.decode()?.strip_prefix('#') {
1756 let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1757 return Ok(Some(ch));
1758 }
1759 Ok(None)
1760 }
1761}
1762
1763impl<'a> Debug for BytesRef<'a> {
1764 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1765 write!(f, "BytesRef {{ content: ")?;
1766 write_cow_string(f, &self.content)?;
1767 write!(f, " }}")
1768 }
1769}
1770
1771impl<'a> Deref for BytesRef<'a> {
1772 type Target = [u8];
1773
1774 fn deref(&self) -> &[u8] {
1775 &self.content
1776 }
1777}
1778
1779#[cfg(feature = "arbitrary")]
1780impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1781 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1782 Ok(Self::new(<&str>::arbitrary(u)?))
1783 }
1784
1785 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1786 <&str as arbitrary::Arbitrary>::size_hint(depth)
1787 }
1788}
1789
1790////////////////////////////////////////////////////////////////////////////////////////////////////
1791
1792/// Event emitted by [`Reader::read_event_into`].
1793///
1794/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1795#[derive(Clone, Debug, Eq, PartialEq)]
1796#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1797pub enum Event<'a> {
1798 /// Start tag (with attributes) `<tag attr="value">`.
1799 Start(BytesStart<'a>),
1800 /// End tag `</tag>`.
1801 End(BytesEnd<'a>),
1802 /// Empty element tag (with attributes) `<tag attr="value" />`.
1803 Empty(BytesStart<'a>),
1804 /// Escaped character data between tags.
1805 Text(BytesText<'a>),
1806 /// Unescaped character data stored in `<![CDATA[...]]>`.
1807 CData(BytesCData<'a>),
1808 /// Comment `<!-- ... -->`.
1809 Comment(BytesText<'a>),
1810 /// XML declaration `<?xml ...?>`.
1811 Decl(BytesDecl<'a>),
1812 /// Processing instruction `<?...?>`.
1813 PI(BytesPI<'a>),
1814 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1815 DocType(BytesText<'a>),
1816 /// General reference `&entity;` in the textual data. Can be either an entity
1817 /// reference, or a character reference.
1818 GeneralRef(BytesRef<'a>),
1819 /// End of XML document.
1820 Eof,
1821}
1822
1823impl<'a> Event<'a> {
1824 /// Converts the event to an owned version, untied to the lifetime of
1825 /// buffer used when reading but incurring a new, separate allocation.
1826 pub fn into_owned(self) -> Event<'static> {
1827 match self {
1828 Event::Start(e) => Event::Start(e.into_owned()),
1829 Event::End(e) => Event::End(e.into_owned()),
1830 Event::Empty(e) => Event::Empty(e.into_owned()),
1831 Event::Text(e) => Event::Text(e.into_owned()),
1832 Event::Comment(e) => Event::Comment(e.into_owned()),
1833 Event::CData(e) => Event::CData(e.into_owned()),
1834 Event::Decl(e) => Event::Decl(e.into_owned()),
1835 Event::PI(e) => Event::PI(e.into_owned()),
1836 Event::DocType(e) => Event::DocType(e.into_owned()),
1837 Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1838 Event::Eof => Event::Eof,
1839 }
1840 }
1841
1842 /// Converts the event into a borrowed event.
1843 #[inline]
1844 pub fn borrow(&self) -> Event<'_> {
1845 match self {
1846 Event::Start(e) => Event::Start(e.borrow()),
1847 Event::End(e) => Event::End(e.borrow()),
1848 Event::Empty(e) => Event::Empty(e.borrow()),
1849 Event::Text(e) => Event::Text(e.borrow()),
1850 Event::Comment(e) => Event::Comment(e.borrow()),
1851 Event::CData(e) => Event::CData(e.borrow()),
1852 Event::Decl(e) => Event::Decl(e.borrow()),
1853 Event::PI(e) => Event::PI(e.borrow()),
1854 Event::DocType(e) => Event::DocType(e.borrow()),
1855 Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1856 Event::Eof => Event::Eof,
1857 }
1858 }
1859}
1860
1861impl<'a> Deref for Event<'a> {
1862 type Target = [u8];
1863
1864 fn deref(&self) -> &[u8] {
1865 match *self {
1866 Event::Start(ref e) | Event::Empty(ref e) => e,
1867 Event::End(ref e) => e,
1868 Event::Text(ref e) => e,
1869 Event::Decl(ref e) => e,
1870 Event::PI(ref e) => e,
1871 Event::CData(ref e) => e,
1872 Event::Comment(ref e) => e,
1873 Event::DocType(ref e) => e,
1874 Event::GeneralRef(ref e) => e,
1875 Event::Eof => &[],
1876 }
1877 }
1878}
1879
1880impl<'a> AsRef<Event<'a>> for Event<'a> {
1881 fn as_ref(&self) -> &Event<'a> {
1882 self
1883 }
1884}
1885
1886////////////////////////////////////////////////////////////////////////////////////////////////////
1887
1888#[inline]
1889fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1890 match content.into() {
1891 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1892 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1893 }
1894}
1895
1896fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1897where
1898 F: FnOnce(&[u8]) -> &[u8],
1899{
1900 match value {
1901 Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1902 Cow::Owned(mut bytes) => {
1903 let trimmed = trim(&bytes);
1904 if trimmed.len() != bytes.len() {
1905 bytes = trimmed.to_vec();
1906 }
1907 Cow::Owned(bytes)
1908 }
1909 }
1910}
1911
1912#[cfg(test)]
1913mod test {
1914 use super::*;
1915 use pretty_assertions::assert_eq;
1916
1917 #[test]
1918 fn bytestart_create() {
1919 let b = BytesStart::new("test");
1920 assert_eq!(b.len(), 4);
1921 assert_eq!(b.name(), QName(b"test"));
1922 }
1923
1924 #[test]
1925 fn bytestart_set_name() {
1926 let mut b = BytesStart::new("test");
1927 assert_eq!(b.len(), 4);
1928 assert_eq!(b.name(), QName(b"test"));
1929 assert_eq!(b.attributes_raw(), b"");
1930 b.push_attribute(("x", "a"));
1931 assert_eq!(b.len(), 10);
1932 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1933 b.set_name(b"g");
1934 assert_eq!(b.len(), 7);
1935 assert_eq!(b.name(), QName(b"g"));
1936 }
1937
1938 #[test]
1939 fn bytestart_clear_attributes() {
1940 let mut b = BytesStart::new("test");
1941 b.push_attribute(("x", "y\"z"));
1942 b.push_attribute(("x", "y\"z"));
1943 b.clear_attributes();
1944 assert!(b.attributes().next().is_none());
1945 assert_eq!(b.len(), 4);
1946 assert_eq!(b.name(), QName(b"test"));
1947 }
1948}