Skip to main content

quick_xml/events/
mod.rs

1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52    escape, minimal_escape, normalize_xml10_eols, normalize_xml11_eols, parse_number,
53    partial_escape, EscapeError,
54};
55use crate::name::{LocalName, QName};
56use crate::utils::{self, name_len, trim_xml_end, trim_xml_start, write_cow_string};
57use attributes::{AttrError, Attribute, Attributes};
58
59/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
60///
61/// The name can be accessed using the [`name`] or [`local_name`] methods.
62/// An iterator over the attributes is returned by the [`attributes`] method.
63///
64/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
65/// returns the content of this event between `<` and `>` or `/>`:
66///
67/// ```
68/// # use quick_xml::events::{BytesStart, Event};
69/// # use quick_xml::reader::Reader;
70/// # use pretty_assertions::assert_eq;
71/// // Remember, that \ at the end of string literal strips
72/// // all space characters to the first non-space character
73/// let mut reader = Reader::from_str("\
74///     <element a1 = 'val1' a2=\"val2\" />\
75///     <element a1 = 'val1' a2=\"val2\" >"
76/// );
77/// let content = "element a1 = 'val1' a2=\"val2\" ";
78/// let event = BytesStart::from_content(content, 7);
79///
80/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
81/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
82/// // deref coercion of &BytesStart to &[u8]
83/// assert_eq!(&event as &[u8], content.as_bytes());
84/// // AsRef<[u8]> for &T + deref coercion
85/// assert_eq!(event.as_ref(), content.as_bytes());
86/// ```
87///
88/// [`name`]: Self::name
89/// [`local_name`]: Self::local_name
90/// [`attributes`]: Self::attributes
91#[derive(Clone, Eq, PartialEq)]
92pub struct BytesStart<'a> {
93    /// content of the element, before any utf8 conversion
94    pub(crate) buf: Cow<'a, [u8]>,
95    /// end of the element name, the name starts at that the start of `buf`
96    pub(crate) name_len: usize,
97    /// Encoding used for `buf`
98    decoder: Decoder,
99}
100
101impl<'a> BytesStart<'a> {
102    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
103    #[inline]
104    pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
105        BytesStart {
106            buf: Cow::Borrowed(content),
107            name_len,
108            decoder,
109        }
110    }
111
112    /// Creates a new `BytesStart` from the given name.
113    ///
114    /// # Warning
115    ///
116    /// `name` must be a valid name.
117    #[inline]
118    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
119        let buf = str_cow_to_bytes(name);
120        BytesStart {
121            name_len: buf.len(),
122            buf,
123            decoder: Decoder::utf8(),
124        }
125    }
126
127    /// Creates a new `BytesStart` from the given content (name + attributes).
128    ///
129    /// # Warning
130    ///
131    /// `&content[..name_len]` must be a valid name, and the remainder of `content`
132    /// must be correctly-formed attributes. Neither are checked, it is possible
133    /// to generate invalid XML if `content` or `name_len` are incorrect.
134    #[inline]
135    pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
136        BytesStart {
137            buf: str_cow_to_bytes(content),
138            name_len,
139            decoder: Decoder::utf8(),
140        }
141    }
142
143    /// Converts the event into an owned event.
144    pub fn into_owned(self) -> BytesStart<'static> {
145        BytesStart {
146            buf: Cow::Owned(self.buf.into_owned()),
147            name_len: self.name_len,
148            decoder: self.decoder,
149        }
150    }
151
152    /// Converts the event into an owned event without taking ownership of Event
153    pub fn to_owned(&self) -> BytesStart<'static> {
154        BytesStart {
155            buf: Cow::Owned(self.buf.clone().into_owned()),
156            name_len: self.name_len,
157            decoder: self.decoder,
158        }
159    }
160
161    /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
162    ///
163    /// # Example
164    ///
165    /// ```
166    /// use quick_xml::events::{BytesStart, Event};
167    /// # use quick_xml::writer::Writer;
168    /// # use quick_xml::Error;
169    ///
170    /// struct SomeStruct<'a> {
171    ///     attrs: BytesStart<'a>,
172    ///     // ...
173    /// }
174    /// # impl<'a> SomeStruct<'a> {
175    /// # fn example(&self) -> Result<(), Error> {
176    /// # let mut writer = Writer::new(Vec::new());
177    ///
178    /// writer.write_event(Event::Start(self.attrs.borrow()))?;
179    /// // ...
180    /// writer.write_event(Event::End(self.attrs.to_end()))?;
181    /// # Ok(())
182    /// # }}
183    /// ```
184    ///
185    /// [`to_end`]: Self::to_end
186    pub fn borrow(&self) -> BytesStart<'_> {
187        BytesStart {
188            buf: Cow::Borrowed(&self.buf),
189            name_len: self.name_len,
190            decoder: self.decoder,
191        }
192    }
193
194    /// Creates new paired close tag
195    #[inline]
196    pub fn to_end(&self) -> BytesEnd<'_> {
197        BytesEnd::from(self.name())
198    }
199
200    /// Get the decoder, used to decode bytes, read by the reader which produces
201    /// this event, to the strings.
202    ///
203    /// When event was created manually, encoding is UTF-8.
204    ///
205    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
206    /// defaults to UTF-8.
207    ///
208    /// [`encoding`]: ../index.html#encoding
209    #[inline]
210    pub const fn decoder(&self) -> Decoder {
211        self.decoder
212    }
213
214    /// Gets the undecoded raw tag name, as present in the input stream.
215    #[inline]
216    pub fn name(&self) -> QName<'_> {
217        QName(&self.buf[..self.name_len])
218    }
219
220    /// Gets the undecoded raw local tag name (excluding namespace) as present
221    /// in the input stream.
222    ///
223    /// All content up to and including the first `:` character is removed from the tag name.
224    #[inline]
225    pub fn local_name(&self) -> LocalName<'_> {
226        self.name().into()
227    }
228
229    /// Edit the name of the BytesStart in-place
230    ///
231    /// # Warning
232    ///
233    /// `name` must be a valid name.
234    pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
235        let bytes = self.buf.to_mut();
236        bytes.splice(..self.name_len, name.iter().cloned());
237        self.name_len = name.len();
238        self
239    }
240}
241
242/// Attribute-related methods
243impl<'a> BytesStart<'a> {
244    /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
245    ///
246    /// The yielded items must be convertible to [`Attribute`] using `Into`.
247    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
248    where
249        I: IntoIterator,
250        I::Item: Into<Attribute<'b>>,
251    {
252        self.extend_attributes(attributes);
253        self
254    }
255
256    /// Add additional attributes to this tag using an iterator.
257    ///
258    /// The yielded items must be convertible to [`Attribute`] using `Into`.
259    pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
260    where
261        I: IntoIterator,
262        I::Item: Into<Attribute<'b>>,
263    {
264        for attr in attributes {
265            self.push_attribute(attr);
266        }
267        self
268    }
269
270    /// Adds an attribute to this element.
271    pub fn push_attribute<'b, A>(&mut self, attr: A)
272    where
273        A: Into<Attribute<'b>>,
274    {
275        self.buf.to_mut().push(b' ');
276        self.push_attr(attr.into());
277    }
278
279    /// Remove all attributes from the ByteStart
280    pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
281        self.buf.to_mut().truncate(self.name_len);
282        self
283    }
284
285    /// Returns an iterator over the attributes of this tag.
286    pub fn attributes(&self) -> Attributes<'_> {
287        Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
288    }
289
290    /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
291    pub fn html_attributes(&self) -> Attributes<'_> {
292        Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
293    }
294
295    /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
296    /// including the whitespace after the tag name if there is any.
297    #[inline]
298    pub fn attributes_raw(&self) -> &[u8] {
299        &self.buf[self.name_len..]
300    }
301
302    /// Try to get an attribute
303    pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
304        &'a self,
305        attr_name: N,
306    ) -> Result<Option<Attribute<'a>>, AttrError> {
307        for a in self.attributes().with_checks(false) {
308            let a = a?;
309            if a.key.as_ref() == attr_name.as_ref() {
310                return Ok(Some(a));
311            }
312        }
313        Ok(None)
314    }
315
316    /// Adds an attribute to this element.
317    pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
318        let bytes = self.buf.to_mut();
319        bytes.extend_from_slice(attr.key.as_ref());
320        bytes.extend_from_slice(b"=\"");
321        // FIXME: need to escape attribute content
322        bytes.extend_from_slice(attr.value.as_ref());
323        bytes.push(b'"');
324    }
325
326    /// Adds new line in existing element
327    pub(crate) fn push_newline(&mut self) {
328        self.buf.to_mut().push(b'\n');
329    }
330
331    /// Adds indentation bytes in existing element
332    pub(crate) fn push_indent(&mut self, indent: &[u8]) {
333        self.buf.to_mut().extend_from_slice(indent);
334    }
335}
336
337impl<'a> Debug for BytesStart<'a> {
338    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
339        write!(f, "BytesStart {{ buf: ")?;
340        write_cow_string(f, &self.buf)?;
341        write!(f, ", name_len: {} }}", self.name_len)
342    }
343}
344
345impl<'a> Deref for BytesStart<'a> {
346    type Target = [u8];
347
348    fn deref(&self) -> &[u8] {
349        &self.buf
350    }
351}
352
353#[cfg(feature = "arbitrary")]
354impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
355    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
356        let s = <&str>::arbitrary(u)?;
357        if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
358            return Err(arbitrary::Error::IncorrectFormat);
359        }
360        let mut result = Self::new(s);
361        result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?);
362        Ok(result)
363    }
364
365    fn size_hint(depth: usize) -> (usize, Option<usize>) {
366        <&str as arbitrary::Arbitrary>::size_hint(depth)
367    }
368}
369
370////////////////////////////////////////////////////////////////////////////////////////////////////
371
372/// Closing tag data (`Event::End`): `</name>`.
373///
374/// The name can be accessed using the [`name`] or [`local_name`] methods.
375///
376/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
377/// returns the content of this event between `</` and `>`.
378///
379/// Note, that inner text will not contain `>` character inside:
380///
381/// ```
382/// # use quick_xml::events::{BytesEnd, Event};
383/// # use quick_xml::reader::Reader;
384/// # use pretty_assertions::assert_eq;
385/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
386/// // Note, that this entire string considered as a .name()
387/// let content = "element a1 = 'val1' a2=\"val2\" ";
388/// let event = BytesEnd::new(content);
389///
390/// reader.config_mut().trim_markup_names_in_closing_tags = false;
391/// reader.config_mut().check_end_names = false;
392/// reader.read_event().unwrap(); // Skip `<element>`
393///
394/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
395/// assert_eq!(event.name().as_ref(), content.as_bytes());
396/// // deref coercion of &BytesEnd to &[u8]
397/// assert_eq!(&event as &[u8], content.as_bytes());
398/// // AsRef<[u8]> for &T + deref coercion
399/// assert_eq!(event.as_ref(), content.as_bytes());
400/// ```
401///
402/// [`name`]: Self::name
403/// [`local_name`]: Self::local_name
404#[derive(Clone, Eq, PartialEq)]
405pub struct BytesEnd<'a> {
406    name: Cow<'a, [u8]>,
407}
408
409impl<'a> BytesEnd<'a> {
410    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
411    #[inline]
412    pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
413        BytesEnd { name }
414    }
415
416    /// Creates a new `BytesEnd` borrowing a slice.
417    ///
418    /// # Warning
419    ///
420    /// `name` must be a valid name.
421    #[inline]
422    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
423        Self::wrap(str_cow_to_bytes(name))
424    }
425
426    /// Converts the event into an owned event.
427    pub fn into_owned(self) -> BytesEnd<'static> {
428        BytesEnd {
429            name: Cow::Owned(self.name.into_owned()),
430        }
431    }
432
433    /// Converts the event into a borrowed event.
434    #[inline]
435    pub fn borrow(&self) -> BytesEnd<'_> {
436        BytesEnd {
437            name: Cow::Borrowed(&self.name),
438        }
439    }
440
441    /// Gets the undecoded raw tag name, as present in the input stream.
442    #[inline]
443    pub fn name(&self) -> QName<'_> {
444        QName(&self.name)
445    }
446
447    /// Gets the undecoded raw local tag name (excluding namespace) as present
448    /// in the input stream.
449    ///
450    /// All content up to and including the first `:` character is removed from the tag name.
451    #[inline]
452    pub fn local_name(&self) -> LocalName<'_> {
453        self.name().into()
454    }
455}
456
457impl<'a> Debug for BytesEnd<'a> {
458    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
459        write!(f, "BytesEnd {{ name: ")?;
460        write_cow_string(f, &self.name)?;
461        write!(f, " }}")
462    }
463}
464
465impl<'a> Deref for BytesEnd<'a> {
466    type Target = [u8];
467
468    fn deref(&self) -> &[u8] {
469        &self.name
470    }
471}
472
473impl<'a> From<QName<'a>> for BytesEnd<'a> {
474    #[inline]
475    fn from(name: QName<'a>) -> Self {
476        Self::wrap(name.into_inner().into())
477    }
478}
479
480#[cfg(feature = "arbitrary")]
481impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
482    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
483        Ok(Self::new(<&str>::arbitrary(u)?))
484    }
485    fn size_hint(depth: usize) -> (usize, Option<usize>) {
486        <&str as arbitrary::Arbitrary>::size_hint(depth)
487    }
488}
489
490////////////////////////////////////////////////////////////////////////////////////////////////////
491
492/// Data from various events (most notably, `Event::Text`).
493///
494/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
495/// returns the content of this event. In case of comment this is everything
496/// between `<!--` and `-->` and the text of comment may not contain `-->` inside
497/// (if [`Config::check_comments`] is set to `true`).
498/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
499/// (i.e. in case of DTD the first character is never space):
500///
501/// ```
502/// # use quick_xml::events::{BytesText, Event};
503/// # use quick_xml::reader::Reader;
504/// # use pretty_assertions::assert_eq;
505/// // Remember, that \ at the end of string literal strips
506/// // all space characters to the first non-space character
507/// let mut reader = Reader::from_str("\
508///     <!DOCTYPE comment or text >\
509///     comment or text \
510///     <!--comment or text -->"
511/// );
512/// let content = "comment or text ";
513/// let event = BytesText::new(content);
514///
515/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
516/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
517/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
518/// // deref coercion of &BytesText to &[u8]
519/// assert_eq!(&event as &[u8], content.as_bytes());
520/// // AsRef<[u8]> for &T + deref coercion
521/// assert_eq!(event.as_ref(), content.as_bytes());
522/// ```
523///
524/// [`Config::check_comments`]: crate::reader::Config::check_comments
525#[derive(Clone, Eq, PartialEq)]
526pub struct BytesText<'a> {
527    /// Escaped then encoded content of the event. Content is encoded in the XML
528    /// document encoding when event comes from the reader and should be in the
529    /// document encoding when event passed to the writer
530    content: Cow<'a, [u8]>,
531    /// Encoding in which the `content` is stored inside the event
532    decoder: Decoder,
533}
534
535impl<'a> BytesText<'a> {
536    /// Creates a new `BytesText` from a raw byte sequence as it appeared in th XML
537    /// source in the specified encoding.
538    #[inline]
539    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
540        Self {
541            content: content.into(),
542            decoder,
543        }
544    }
545
546    /// Creates a new `BytesText` from a raw string as it appeared in the XML source.
547    ///
548    /// # Warning
549    ///
550    /// `content` is not checked to not contain markup or entity references. Be warned
551    /// that writing such event may result to invalid XML if your content contains not
552    /// defined entity references or invalid XML markup.
553    ///
554    /// `content` may have any EOLs, they will be normalized when using [`xml_content()`] getters.
555    ///
556    /// [`xml_content()`]: Self::xml_content
557    #[inline]
558    pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
559        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
560    }
561
562    /// Creates a new `BytesText` from a string.
563    ///
564    /// # Warning
565    ///
566    /// `content` will be escaped using the [`escape`] function, but that may change
567    /// in the future, because events produced by the reader never contains `&` or `<`,
568    /// and escaping of `>`, `"` and `'` is not required. If you want to preserve exact
569    /// content, use [`from_escaped()`] method, but be warned that writing such event
570    /// may result to invalid XML if your content contains not defined entity references
571    /// or invalid XML markup.
572    ///
573    /// `content` may have any EOLs, they will be normalized when using [`xml_content()`] getters.
574    ///
575    /// [`escape`]: crate::escape::escape
576    /// [`from_escaped()`]: Self::from_escaped
577    /// [`xml_content()`]: Self::xml_content
578    #[inline]
579    pub fn new(content: &'a str) -> Self {
580        Self::from_escaped(escape(content))
581    }
582
583    /// Ensures that all data is owned to extend the object's lifetime if
584    /// necessary.
585    #[inline]
586    pub fn into_owned(self) -> BytesText<'static> {
587        BytesText {
588            content: self.content.into_owned().into(),
589            decoder: self.decoder,
590        }
591    }
592
593    /// Extracts the inner `Cow` from the `BytesText` event container.
594    #[inline]
595    pub fn into_inner(self) -> Cow<'a, [u8]> {
596        self.content
597    }
598
599    /// Converts the event into a borrowed event.
600    #[inline]
601    pub fn borrow(&self) -> BytesText<'_> {
602        BytesText {
603            content: Cow::Borrowed(&self.content),
604            decoder: self.decoder,
605        }
606    }
607
608    /// Decodes the content of the event.
609    ///
610    /// This will allocate if the value is encoded in non-UTF-8 encoding.
611    ///
612    /// This method does not normalizes end-of-line characters as required by [specification].
613    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
614    ///
615    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
616    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
617        self.decoder.decode_cow(&self.content)
618    }
619
620    /// Decodes the content of the XML 1.0 or HTML event.
621    ///
622    /// When this event produced by the reader, it uses the encoding information
623    /// associated with that reader to interpret the raw bytes contained within
624    /// this text event.
625    ///
626    /// This will allocate if the value is encoded in non-UTF-8 encoding, or EOL normalization is required.
627    ///
628    /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
629    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
630    ///
631    /// This method also can be used to get HTML content, because rules the same.
632    ///
633    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
634    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
635    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
636    pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
637        self.decoder.content(&self.content, normalize_xml10_eols)
638    }
639
640    /// Decodes the content of the XML 1.1 event.
641    ///
642    /// When this event produced by the reader, it uses the encoding information
643    /// associated with that reader to interpret the raw bytes contained within
644    /// this text event.
645    ///
646    /// This will allocate if the value is encoded in non-UTF-8 encoding, or EOL normalization is required.
647    ///
648    /// Note, that this method should be used only if event represents XML 1.1 content,
649    /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
650    ///
651    /// To get HTML content use [`xml10_content()`](Self::xml10_content).
652    ///
653    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
654    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
655    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
656    pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
657        self.decoder.content(&self.content, normalize_xml11_eols)
658    }
659
660    /// Alias for [`xml11_content()`](Self::xml11_content).
661    #[inline]
662    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
663        self.xml11_content()
664    }
665
666    /// Alias for [`xml10_content()`](Self::xml10_content).
667    #[inline]
668    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
669        self.xml10_content()
670    }
671
672    /// Removes leading XML whitespace bytes from text content.
673    ///
674    /// Returns `true` if content is empty after that
675    pub fn inplace_trim_start(&mut self) -> bool {
676        self.content = trim_cow(
677            replace(&mut self.content, Cow::Borrowed(b"")),
678            trim_xml_start,
679        );
680        self.content.is_empty()
681    }
682
683    /// Removes trailing XML whitespace bytes from text content.
684    ///
685    /// Returns `true` if content is empty after that
686    pub fn inplace_trim_end(&mut self) -> bool {
687        self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
688        self.content.is_empty()
689    }
690}
691
692impl<'a> Debug for BytesText<'a> {
693    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
694        write!(f, "BytesText {{ content: ")?;
695        write_cow_string(f, &self.content)?;
696        write!(f, " }}")
697    }
698}
699
700impl<'a> Deref for BytesText<'a> {
701    type Target = [u8];
702
703    fn deref(&self) -> &[u8] {
704        &self.content
705    }
706}
707
708#[cfg(feature = "arbitrary")]
709impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
710    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
711        let s = <&str>::arbitrary(u)?;
712        if !s.chars().all(char::is_alphanumeric) {
713            return Err(arbitrary::Error::IncorrectFormat);
714        }
715        Ok(Self::new(s))
716    }
717
718    fn size_hint(depth: usize) -> (usize, Option<usize>) {
719        <&str as arbitrary::Arbitrary>::size_hint(depth)
720    }
721}
722
723////////////////////////////////////////////////////////////////////////////////////////////////////
724
725/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
726/// [convert](Self::escape) it to [`BytesText`].
727///
728/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
729/// returns the content of this event between `<![CDATA[` and `]]>`.
730///
731/// Note, that inner text will not contain `]]>` sequence inside:
732///
733/// ```
734/// # use quick_xml::events::{BytesCData, Event};
735/// # use quick_xml::reader::Reader;
736/// # use pretty_assertions::assert_eq;
737/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
738/// let content = " CDATA section ";
739/// let event = BytesCData::new(content);
740///
741/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
742/// // deref coercion of &BytesCData to &[u8]
743/// assert_eq!(&event as &[u8], content.as_bytes());
744/// // AsRef<[u8]> for &T + deref coercion
745/// assert_eq!(event.as_ref(), content.as_bytes());
746/// ```
747#[derive(Clone, Eq, PartialEq)]
748pub struct BytesCData<'a> {
749    content: Cow<'a, [u8]>,
750    /// Encoding in which the `content` is stored inside the event
751    decoder: Decoder,
752}
753
754impl<'a> BytesCData<'a> {
755    /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
756    #[inline]
757    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
758        Self {
759            content: content.into(),
760            decoder,
761        }
762    }
763
764    /// Creates a new `BytesCData` from a string.
765    ///
766    /// # Warning
767    ///
768    /// `content` must not contain the `]]>` sequence. You can use
769    /// [`BytesCData::escaped`] to escape the content instead.
770    #[inline]
771    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
772        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
773    }
774
775    /// Creates an iterator of `BytesCData` from a string.
776    ///
777    /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
778    /// sections, splitting the `]]` and `>` characters, because the CDATA closing
779    /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
780    /// for each of those sections.
781    ///
782    /// # Examples
783    ///
784    /// ```
785    /// # use quick_xml::events::BytesCData;
786    /// # use pretty_assertions::assert_eq;
787    /// let content = "";
788    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
789    /// assert_eq!(cdata, &[BytesCData::new("")]);
790    ///
791    /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
792    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
793    /// assert_eq!(cdata, &[
794    ///     BytesCData::new("Certain tokens like ]]"),
795    ///     BytesCData::new("> can be difficult and <invalid>"),
796    /// ]);
797    ///
798    /// let content = "foo]]>bar]]>baz]]>quux";
799    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
800    /// assert_eq!(cdata, &[
801    ///     BytesCData::new("foo]]"),
802    ///     BytesCData::new(">bar]]"),
803    ///     BytesCData::new(">baz]]"),
804    ///     BytesCData::new(">quux"),
805    /// ]);
806    /// ```
807    #[inline]
808    pub fn escaped(content: &'a str) -> CDataIterator<'a> {
809        CDataIterator {
810            inner: utils::CDataIterator::new(content),
811        }
812    }
813
814    /// Ensures that all data is owned to extend the object's lifetime if
815    /// necessary.
816    #[inline]
817    pub fn into_owned(self) -> BytesCData<'static> {
818        BytesCData {
819            content: self.content.into_owned().into(),
820            decoder: self.decoder,
821        }
822    }
823
824    /// Extracts the inner `Cow` from the `BytesCData` event container.
825    #[inline]
826    pub fn into_inner(self) -> Cow<'a, [u8]> {
827        self.content
828    }
829
830    /// Converts the event into a borrowed event.
831    #[inline]
832    pub fn borrow(&self) -> BytesCData<'_> {
833        BytesCData {
834            content: Cow::Borrowed(&self.content),
835            decoder: self.decoder,
836        }
837    }
838
839    /// Converts this CDATA content to an escaped version, that can be written
840    /// as an usual text in XML.
841    ///
842    /// This function performs following replacements:
843    ///
844    /// | Character | Replacement
845    /// |-----------|------------
846    /// | `<`       | `&lt;`
847    /// | `>`       | `&gt;`
848    /// | `&`       | `&amp;`
849    /// | `'`       | `&apos;`
850    /// | `"`       | `&quot;`
851    pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
852        let decoded = self.decode()?;
853        Ok(BytesText::wrap(
854            match escape(decoded) {
855                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
856                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
857            },
858            Decoder::utf8(),
859        ))
860    }
861
862    /// Converts this CDATA content to an escaped version, that can be written
863    /// as an usual text in XML.
864    ///
865    /// In XML text content, it is allowed (though not recommended) to leave
866    /// the quote special characters `"` and `'` unescaped.
867    ///
868    /// This function performs following replacements:
869    ///
870    /// | Character | Replacement
871    /// |-----------|------------
872    /// | `<`       | `&lt;`
873    /// | `>`       | `&gt;`
874    /// | `&`       | `&amp;`
875    pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
876        let decoded = self.decode()?;
877        Ok(BytesText::wrap(
878            match partial_escape(decoded) {
879                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
880                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
881            },
882            Decoder::utf8(),
883        ))
884    }
885
886    /// Converts this CDATA content to an escaped version, that can be written
887    /// as an usual text in XML. This method escapes only those characters that
888    /// must be escaped according to the [specification].
889    ///
890    /// This function performs following replacements:
891    ///
892    /// | Character | Replacement
893    /// |-----------|------------
894    /// | `<`       | `&lt;`
895    /// | `&`       | `&amp;`
896    ///
897    /// [specification]: https://www.w3.org/TR/xml11/#syntax
898    pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
899        let decoded = self.decode()?;
900        Ok(BytesText::wrap(
901            match minimal_escape(decoded) {
902                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
903                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
904            },
905            Decoder::utf8(),
906        ))
907    }
908
909    /// Decodes the raw input byte content of the CDATA section into a string,
910    /// without performing XML entity escaping.
911    ///
912    /// When this event produced by the XML reader, it uses the encoding information
913    /// associated with that reader to interpret the raw bytes contained within this
914    /// CDATA event.
915    ///
916    /// This method does not normalizes end-of-line characters as required by [specification].
917    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
918    ///
919    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
920    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
921        self.decoder.decode_cow(&self.content)
922    }
923
924    /// Decodes the raw input byte content of the CDATA section of the XML 1.0 or
925    /// HTML event into a string.
926    ///
927    /// When this event produced by the reader, it uses the encoding information
928    /// associated with that reader to interpret the raw bytes contained within
929    /// this CDATA event.
930    ///
931    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
932    /// is required.
933    ///
934    /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
935    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
936    ///
937    /// This method also can be used to get HTML content, because rules the same.
938    ///
939    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
940    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
941    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
942    pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
943        self.decoder.content(&self.content, normalize_xml10_eols)
944    }
945
946    /// Decodes the raw input byte content of the CDATA section of the XML 1.1 event
947    /// into a string.
948    ///
949    /// When this event produced by the reader, it uses the encoding information
950    /// associated with that reader to interpret the raw bytes contained within
951    /// this CDATA event.
952    ///
953    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
954    /// is required.
955    ///
956    /// Note, that this method should be used only if event represents XML 1.1 content,
957    /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
958    ///
959    /// To get HTML content use [`xml10_content()`](Self::xml10_content).
960    ///
961    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
962    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
963    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
964    pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
965        self.decoder.content(&self.content, normalize_xml11_eols)
966    }
967
968    /// Alias for [`xml11_content()`](Self::xml11_content).
969    #[inline]
970    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
971        self.xml11_content()
972    }
973
974    /// Alias for [`xml10_content()`](Self::xml10_content).
975    #[inline]
976    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
977        self.xml10_content()
978    }
979}
980
981impl<'a> Debug for BytesCData<'a> {
982    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
983        write!(f, "BytesCData {{ content: ")?;
984        write_cow_string(f, &self.content)?;
985        write!(f, " }}")
986    }
987}
988
989impl<'a> Deref for BytesCData<'a> {
990    type Target = [u8];
991
992    fn deref(&self) -> &[u8] {
993        &self.content
994    }
995}
996
997#[cfg(feature = "arbitrary")]
998impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
999    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1000        Ok(Self::new(<&str>::arbitrary(u)?))
1001    }
1002    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1003        <&str as arbitrary::Arbitrary>::size_hint(depth)
1004    }
1005}
1006
1007/// Iterator over `CDATA` sections in a string.
1008///
1009/// This iterator is created by the [`BytesCData::escaped`] method.
1010#[derive(Debug, Clone)]
1011pub struct CDataIterator<'a> {
1012    inner: utils::CDataIterator<'a>,
1013}
1014
1015impl<'a> Iterator for CDataIterator<'a> {
1016    type Item = BytesCData<'a>;
1017
1018    fn next(&mut self) -> Option<BytesCData<'a>> {
1019        self.inner
1020            .next()
1021            .map(|slice| BytesCData::wrap(slice.as_bytes(), Decoder::utf8()))
1022    }
1023}
1024
1025impl FusedIterator for CDataIterator<'_> {}
1026
1027////////////////////////////////////////////////////////////////////////////////////////////////////
1028
1029/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
1030///
1031/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1032/// returns the content of this event between `<?` and `?>`.
1033///
1034/// Note, that inner text will not contain `?>` sequence inside:
1035///
1036/// ```
1037/// # use quick_xml::events::{BytesPI, Event};
1038/// # use quick_xml::reader::Reader;
1039/// # use pretty_assertions::assert_eq;
1040/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
1041/// let content = "processing instruction >:-<~ ";
1042/// let event = BytesPI::new(content);
1043///
1044/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
1045/// // deref coercion of &BytesPI to &[u8]
1046/// assert_eq!(&event as &[u8], content.as_bytes());
1047/// // AsRef<[u8]> for &T + deref coercion
1048/// assert_eq!(event.as_ref(), content.as_bytes());
1049/// ```
1050///
1051/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
1052#[derive(Clone, Eq, PartialEq)]
1053pub struct BytesPI<'a> {
1054    content: BytesStart<'a>,
1055}
1056
1057impl<'a> BytesPI<'a> {
1058    /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1059    #[inline]
1060    pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
1061        Self {
1062            content: BytesStart::wrap(content, target_len, decoder),
1063        }
1064    }
1065
1066    /// Creates a new `BytesPI` from a string.
1067    ///
1068    /// # Warning
1069    ///
1070    /// `content` must not contain the `?>` sequence.
1071    #[inline]
1072    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1073        let buf = str_cow_to_bytes(content);
1074        let name_len = name_len(&buf);
1075        Self {
1076            content: BytesStart {
1077                buf,
1078                name_len,
1079                decoder: Decoder::utf8(),
1080            },
1081        }
1082    }
1083
1084    /// Ensures that all data is owned to extend the object's lifetime if
1085    /// necessary.
1086    #[inline]
1087    pub fn into_owned(self) -> BytesPI<'static> {
1088        BytesPI {
1089            content: self.content.into_owned(),
1090        }
1091    }
1092
1093    /// Extracts the inner `Cow` from the `BytesPI` event container.
1094    #[inline]
1095    pub fn into_inner(self) -> Cow<'a, [u8]> {
1096        self.content.buf
1097    }
1098
1099    /// Converts the event into a borrowed event.
1100    #[inline]
1101    pub fn borrow(&self) -> BytesPI<'_> {
1102        BytesPI {
1103            content: self.content.borrow(),
1104        }
1105    }
1106
1107    /// A target used to identify the application to which the instruction is directed.
1108    ///
1109    /// # Example
1110    ///
1111    /// ```
1112    /// # use pretty_assertions::assert_eq;
1113    /// use quick_xml::events::BytesPI;
1114    ///
1115    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1116    /// assert_eq!(instruction.target(), b"xml-stylesheet");
1117    /// ```
1118    #[inline]
1119    pub fn target(&self) -> &[u8] {
1120        self.content.name().0
1121    }
1122
1123    /// Content of the processing instruction. Contains everything between target
1124    /// name and the end of the instruction. A direct consequence is that the first
1125    /// character is always a space character.
1126    ///
1127    /// # Example
1128    ///
1129    /// ```
1130    /// # use pretty_assertions::assert_eq;
1131    /// use quick_xml::events::BytesPI;
1132    ///
1133    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1134    /// assert_eq!(instruction.content(), br#" href="style.css""#);
1135    /// ```
1136    #[inline]
1137    pub fn content(&self) -> &[u8] {
1138        self.content.attributes_raw()
1139    }
1140
1141    /// A view of the processing instructions' content as a list of key-value pairs.
1142    ///
1143    /// Key-value pairs are used in some processing instructions, for example in
1144    /// `<?xml-stylesheet?>`.
1145    ///
1146    /// Returned iterator does not validate attribute values as may required by
1147    /// target's rules. For example, it doesn't check that substring `?>` is not
1148    /// present in the attribute value. That shouldn't be the problem when event
1149    /// is produced by the reader, because reader detects end of processing instruction
1150    /// by the first `?>` sequence, as required by the specification, and therefore
1151    /// this sequence cannot appear inside it.
1152    ///
1153    /// # Example
1154    ///
1155    /// ```
1156    /// # use pretty_assertions::assert_eq;
1157    /// use std::borrow::Cow;
1158    /// use quick_xml::events::attributes::Attribute;
1159    /// use quick_xml::events::BytesPI;
1160    /// use quick_xml::name::QName;
1161    ///
1162    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1163    /// for attr in instruction.attributes() {
1164    ///     assert_eq!(attr, Ok(Attribute {
1165    ///         key: QName(b"href"),
1166    ///         value: Cow::Borrowed(b"style.css"),
1167    ///     }));
1168    /// }
1169    /// ```
1170    #[inline]
1171    pub fn attributes(&self) -> Attributes<'_> {
1172        self.content.attributes()
1173    }
1174}
1175
1176impl<'a> Debug for BytesPI<'a> {
1177    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1178        write!(f, "BytesPI {{ content: ")?;
1179        write_cow_string(f, &self.content.buf)?;
1180        write!(f, " }}")
1181    }
1182}
1183
1184impl<'a> Deref for BytesPI<'a> {
1185    type Target = [u8];
1186
1187    fn deref(&self) -> &[u8] {
1188        &self.content
1189    }
1190}
1191
1192#[cfg(feature = "arbitrary")]
1193impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1194    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1195        Ok(Self::new(<&str>::arbitrary(u)?))
1196    }
1197    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1198        <&str as arbitrary::Arbitrary>::size_hint(depth)
1199    }
1200}
1201
1202////////////////////////////////////////////////////////////////////////////////////////////////////
1203
1204/// An XML declaration (`Event::Decl`).
1205///
1206/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1207///
1208/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1209/// returns the content of this event between `<?` and `?>`.
1210///
1211/// Note, that inner text will not contain `?>` sequence inside:
1212///
1213/// ```
1214/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1215/// # use quick_xml::reader::Reader;
1216/// # use pretty_assertions::assert_eq;
1217/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1218/// let content = "xml version = '1.0' ";
1219/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1220///
1221/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1222/// // deref coercion of &BytesDecl to &[u8]
1223/// assert_eq!(&event as &[u8], content.as_bytes());
1224/// // AsRef<[u8]> for &T + deref coercion
1225/// assert_eq!(event.as_ref(), content.as_bytes());
1226/// ```
1227#[derive(Clone, Debug, Eq, PartialEq)]
1228pub struct BytesDecl<'a> {
1229    content: BytesStart<'a>,
1230}
1231
1232impl<'a> BytesDecl<'a> {
1233    /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1234    /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1235    /// attribute.
1236    ///
1237    /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1238    /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1239    /// the double quote character is not allowed in any of the attribute values.
1240    pub fn new(
1241        version: &str,
1242        encoding: Option<&str>,
1243        standalone: Option<&str>,
1244    ) -> BytesDecl<'static> {
1245        // Compute length of the buffer based on supplied attributes
1246        // ' encoding=""'   => 12
1247        let encoding_attr_len = if let Some(xs) = encoding {
1248            12 + xs.len()
1249        } else {
1250            0
1251        };
1252        // ' standalone=""' => 14
1253        let standalone_attr_len = if let Some(xs) = standalone {
1254            14 + xs.len()
1255        } else {
1256            0
1257        };
1258        // 'xml version=""' => 14
1259        let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1260
1261        buf.push_str("xml version=\"");
1262        buf.push_str(version);
1263
1264        if let Some(encoding_val) = encoding {
1265            buf.push_str("\" encoding=\"");
1266            buf.push_str(encoding_val);
1267        }
1268
1269        if let Some(standalone_val) = standalone {
1270            buf.push_str("\" standalone=\"");
1271            buf.push_str(standalone_val);
1272        }
1273        buf.push('"');
1274
1275        BytesDecl {
1276            content: BytesStart::from_content(buf, 3),
1277        }
1278    }
1279
1280    /// Creates a `BytesDecl` from a `BytesStart`
1281    pub const fn from_start(start: BytesStart<'a>) -> Self {
1282        Self { content: start }
1283    }
1284
1285    /// Gets xml version, excluding quotes (`'` or `"`).
1286    ///
1287    /// According to the [grammar], the version *must* be the first thing in the declaration.
1288    /// This method tries to extract the first thing in the declaration and return it.
1289    /// In case of multiple attributes value of the first one is returned.
1290    ///
1291    /// If version is missed in the declaration, or the first thing is not a version,
1292    /// [`IllFormedError::MissingDeclVersion`] will be returned.
1293    ///
1294    /// # Examples
1295    ///
1296    /// ```
1297    /// use quick_xml::errors::{Error, IllFormedError};
1298    /// use quick_xml::events::{BytesDecl, BytesStart};
1299    ///
1300    /// // <?xml version='1.1'?>
1301    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1302    /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1303    ///
1304    /// // <?xml version='1.0' version='1.1'?>
1305    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1306    /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1307    ///
1308    /// // <?xml encoding='utf-8'?>
1309    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1310    /// match decl.version() {
1311    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1312    ///     _ => assert!(false),
1313    /// }
1314    ///
1315    /// // <?xml encoding='utf-8' version='1.1'?>
1316    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1317    /// match decl.version() {
1318    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1319    ///     _ => assert!(false),
1320    /// }
1321    ///
1322    /// // <?xml?>
1323    /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1324    /// match decl.version() {
1325    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1326    ///     _ => assert!(false),
1327    /// }
1328    /// ```
1329    ///
1330    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1331    pub fn version(&self) -> Result<Cow<'_, [u8]>, Error> {
1332        // The version *must* be the first thing in the declaration.
1333        match self.content.attributes().with_checks(false).next() {
1334            Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1335            // first attribute was not "version"
1336            Some(Ok(a)) => {
1337                let found = from_utf8(a.key.as_ref())
1338                    .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1339                    .to_string();
1340                Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1341                    found,
1342                ))))
1343            }
1344            // error parsing attributes
1345            Some(Err(e)) => Err(e.into()),
1346            // no attributes
1347            None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1348        }
1349    }
1350
1351    /// Gets xml encoding, excluding quotes (`'` or `"`).
1352    ///
1353    /// Although according to the [grammar] encoding must appear before `"standalone"`
1354    /// and after `"version"`, this method does not check that. The first occurrence
1355    /// of the attribute will be returned even if there are several. Also, method does
1356    /// not restrict symbols that can forming the encoding, so the returned encoding
1357    /// name may not correspond to the grammar.
1358    ///
1359    /// # Examples
1360    ///
1361    /// ```
1362    /// use std::borrow::Cow;
1363    /// use quick_xml::Error;
1364    /// use quick_xml::events::{BytesDecl, BytesStart};
1365    ///
1366    /// // <?xml version='1.1'?>
1367    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1368    /// assert!(decl.encoding().is_none());
1369    ///
1370    /// // <?xml encoding='utf-8'?>
1371    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1372    /// match decl.encoding() {
1373    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1374    ///     _ => assert!(false),
1375    /// }
1376    ///
1377    /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1378    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1379    /// match decl.encoding() {
1380    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1381    ///     _ => assert!(false),
1382    /// }
1383    /// ```
1384    ///
1385    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1386    pub fn encoding(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1387        self.content
1388            .try_get_attribute("encoding")
1389            .map(|a| a.map(|a| a.value))
1390            .transpose()
1391    }
1392
1393    /// Gets xml standalone, excluding quotes (`'` or `"`).
1394    ///
1395    /// Although according to the [grammar] standalone flag must appear after `"version"`
1396    /// and `"encoding"`, this method does not check that. The first occurrence of the
1397    /// attribute will be returned even if there are several. Also, method does not
1398    /// restrict symbols that can forming the value, so the returned flag name may not
1399    /// correspond to the grammar.
1400    ///
1401    /// # Examples
1402    ///
1403    /// ```
1404    /// use std::borrow::Cow;
1405    /// use quick_xml::Error;
1406    /// use quick_xml::events::{BytesDecl, BytesStart};
1407    ///
1408    /// // <?xml version='1.1'?>
1409    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1410    /// assert!(decl.standalone().is_none());
1411    ///
1412    /// // <?xml standalone='yes'?>
1413    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1414    /// match decl.standalone() {
1415    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1416    ///     _ => assert!(false),
1417    /// }
1418    ///
1419    /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1420    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1421    /// match decl.standalone() {
1422    ///     Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1423    ///     _ => assert!(false),
1424    /// }
1425    /// ```
1426    ///
1427    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1428    pub fn standalone(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1429        self.content
1430            .try_get_attribute("standalone")
1431            .map(|a| a.map(|a| a.value))
1432            .transpose()
1433    }
1434
1435    /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1436    /// algorithm.
1437    ///
1438    /// If encoding in not known, or `encoding` key was not found, returns `None`.
1439    /// In case of duplicated `encoding` key, encoding, corresponding to the first
1440    /// one, is returned.
1441    #[cfg(feature = "encoding")]
1442    pub fn encoder(&self) -> Option<&'static Encoding> {
1443        self.encoding()
1444            .and_then(|e| e.ok())
1445            .and_then(|e| Encoding::for_label(&e))
1446    }
1447
1448    /// Converts the event into an owned event.
1449    pub fn into_owned(self) -> BytesDecl<'static> {
1450        BytesDecl {
1451            content: self.content.into_owned(),
1452        }
1453    }
1454
1455    /// Converts the event into a borrowed event.
1456    #[inline]
1457    pub fn borrow(&self) -> BytesDecl<'_> {
1458        BytesDecl {
1459            content: self.content.borrow(),
1460        }
1461    }
1462}
1463
1464impl<'a> Deref for BytesDecl<'a> {
1465    type Target = [u8];
1466
1467    fn deref(&self) -> &[u8] {
1468        &self.content
1469    }
1470}
1471
1472#[cfg(feature = "arbitrary")]
1473impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1474    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1475        Ok(Self::new(
1476            <&str>::arbitrary(u)?,
1477            Option::<&str>::arbitrary(u)?,
1478            Option::<&str>::arbitrary(u)?,
1479        ))
1480    }
1481
1482    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1483        <&str as arbitrary::Arbitrary>::size_hint(depth)
1484    }
1485}
1486
1487////////////////////////////////////////////////////////////////////////////////////////////////////
1488
1489/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1490///
1491/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1492/// returns the content of this event between `&` and `;`:
1493///
1494/// ```
1495/// # use quick_xml::events::{BytesRef, Event};
1496/// # use quick_xml::reader::Reader;
1497/// # use pretty_assertions::assert_eq;
1498/// let mut reader = Reader::from_str(r#"&entity;"#);
1499/// let content = "entity";
1500/// let event = BytesRef::new(content);
1501///
1502/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1503/// // deref coercion of &BytesRef to &[u8]
1504/// assert_eq!(&event as &[u8], content.as_bytes());
1505/// // AsRef<[u8]> for &T + deref coercion
1506/// assert_eq!(event.as_ref(), content.as_bytes());
1507/// ```
1508#[derive(Clone, Eq, PartialEq)]
1509pub struct BytesRef<'a> {
1510    content: Cow<'a, [u8]>,
1511    /// Encoding in which the `content` is stored inside the event.
1512    decoder: Decoder,
1513}
1514
1515impl<'a> BytesRef<'a> {
1516    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1517    #[inline]
1518    pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1519        Self {
1520            content: Cow::Borrowed(content),
1521            decoder,
1522        }
1523    }
1524
1525    /// Creates a new `BytesRef` borrowing a slice.
1526    ///
1527    /// # Warning
1528    ///
1529    /// `name` must be a valid name.
1530    #[inline]
1531    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1532        Self {
1533            content: str_cow_to_bytes(name),
1534            decoder: Decoder::utf8(),
1535        }
1536    }
1537
1538    /// Converts the event into an owned event.
1539    pub fn into_owned(self) -> BytesRef<'static> {
1540        BytesRef {
1541            content: Cow::Owned(self.content.into_owned()),
1542            decoder: self.decoder,
1543        }
1544    }
1545
1546    /// Extracts the inner `Cow` from the `BytesRef` event container.
1547    #[inline]
1548    pub fn into_inner(self) -> Cow<'a, [u8]> {
1549        self.content
1550    }
1551
1552    /// Converts the event into a borrowed event.
1553    #[inline]
1554    pub fn borrow(&self) -> BytesRef<'_> {
1555        BytesRef {
1556            content: Cow::Borrowed(&self.content),
1557            decoder: self.decoder,
1558        }
1559    }
1560
1561    /// Decodes the content of the event.
1562    ///
1563    /// This will allocate if the value is encoded in non-UTF-8 encoding.
1564    ///
1565    /// This method does not normalizes end-of-line characters as required by [specification].
1566    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
1567    ///
1568    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
1569    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1570        self.decoder.decode_cow(&self.content)
1571    }
1572
1573    /// Decodes the content of the XML 1.0 or HTML event.
1574    ///
1575    /// When this event produced by the reader, it uses the encoding information
1576    /// associated with that reader to interpret the raw bytes contained within
1577    /// this general reference event.
1578    ///
1579    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1580    /// is required.
1581    ///
1582    /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
1583    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1584    ///
1585    /// This method also can be used to get HTML content, because rules the same.
1586    ///
1587    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1588    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1589    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1590    pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1591        self.decoder.content(&self.content, normalize_xml10_eols)
1592    }
1593
1594    /// Decodes the content of the XML 1.1 event.
1595    ///
1596    /// When this event produced by the reader, it uses the encoding information
1597    /// associated with that reader to interpret the raw bytes contained within
1598    /// this general reference event.
1599    ///
1600    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1601    /// is required.
1602    ///
1603    /// Note, that this method should be used only if event represents XML 1.1 content,
1604    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1605    ///
1606    /// To get HTML content use [`xml10_content()`](Self::xml10_content).
1607    ///
1608    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1609    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1610    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1611    pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1612        self.decoder.content(&self.content, normalize_xml11_eols)
1613    }
1614
1615    /// Alias for [`xml11_content()`](Self::xml11_content).
1616    #[inline]
1617    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1618        self.xml11_content()
1619    }
1620
1621    /// Alias for [`xml10_content()`](Self::xml10_content).
1622    #[inline]
1623    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1624        self.xml10_content()
1625    }
1626
1627    /// Returns `true` if the specified reference represents the character reference
1628    /// (`&#<number>;`).
1629    ///
1630    /// ```
1631    /// # use quick_xml::events::BytesRef;
1632    /// # use pretty_assertions::assert_eq;
1633    /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1634    /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1635    /// assert_eq!(BytesRef::new("lt"  ).is_char_ref(), false);
1636    /// ```
1637    pub fn is_char_ref(&self) -> bool {
1638        matches!(self.content.first(), Some(b'#'))
1639    }
1640
1641    /// If this reference represents character reference, then resolves it and
1642    /// returns the character, otherwise returns `None`.
1643    ///
1644    /// This method does not check if character is allowed for XML, in other words,
1645    /// well-formedness constraint [WFC: Legal Char] is not enforced.
1646    /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1647    ///
1648    /// ```
1649    /// # use quick_xml::events::BytesRef;
1650    /// # use pretty_assertions::assert_eq;
1651    /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1652    /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1653    /// assert_eq!(BytesRef::new("lt"  ).resolve_char_ref().unwrap(), None);
1654    /// ```
1655    ///
1656    /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1657    pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1658        if let Some(num) = self.decode()?.strip_prefix('#') {
1659            let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1660            return Ok(Some(ch));
1661        }
1662        Ok(None)
1663    }
1664}
1665
1666impl<'a> Debug for BytesRef<'a> {
1667    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1668        write!(f, "BytesRef {{ content: ")?;
1669        write_cow_string(f, &self.content)?;
1670        write!(f, " }}")
1671    }
1672}
1673
1674impl<'a> Deref for BytesRef<'a> {
1675    type Target = [u8];
1676
1677    fn deref(&self) -> &[u8] {
1678        &self.content
1679    }
1680}
1681
1682#[cfg(feature = "arbitrary")]
1683impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1684    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1685        Ok(Self::new(<&str>::arbitrary(u)?))
1686    }
1687
1688    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1689        <&str as arbitrary::Arbitrary>::size_hint(depth)
1690    }
1691}
1692
1693////////////////////////////////////////////////////////////////////////////////////////////////////
1694
1695/// Event emitted by [`Reader::read_event_into`].
1696///
1697/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1698#[derive(Clone, Debug, Eq, PartialEq)]
1699#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1700pub enum Event<'a> {
1701    /// Start tag (with attributes) `<tag attr="value">`.
1702    Start(BytesStart<'a>),
1703    /// End tag `</tag>`.
1704    End(BytesEnd<'a>),
1705    /// Empty element tag (with attributes) `<tag attr="value" />`.
1706    Empty(BytesStart<'a>),
1707    /// Escaped character data between tags.
1708    Text(BytesText<'a>),
1709    /// Unescaped character data stored in `<![CDATA[...]]>`.
1710    CData(BytesCData<'a>),
1711    /// Comment `<!-- ... -->`.
1712    Comment(BytesText<'a>),
1713    /// XML declaration `<?xml ...?>`.
1714    Decl(BytesDecl<'a>),
1715    /// Processing instruction `<?...?>`.
1716    PI(BytesPI<'a>),
1717    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1718    DocType(BytesText<'a>),
1719    /// General reference `&entity;` in the textual data. Can be either an entity
1720    /// reference, or a character reference.
1721    GeneralRef(BytesRef<'a>),
1722    /// End of XML document.
1723    Eof,
1724}
1725
1726impl<'a> Event<'a> {
1727    /// Converts the event to an owned version, untied to the lifetime of
1728    /// buffer used when reading but incurring a new, separate allocation.
1729    pub fn into_owned(self) -> Event<'static> {
1730        match self {
1731            Event::Start(e) => Event::Start(e.into_owned()),
1732            Event::End(e) => Event::End(e.into_owned()),
1733            Event::Empty(e) => Event::Empty(e.into_owned()),
1734            Event::Text(e) => Event::Text(e.into_owned()),
1735            Event::Comment(e) => Event::Comment(e.into_owned()),
1736            Event::CData(e) => Event::CData(e.into_owned()),
1737            Event::Decl(e) => Event::Decl(e.into_owned()),
1738            Event::PI(e) => Event::PI(e.into_owned()),
1739            Event::DocType(e) => Event::DocType(e.into_owned()),
1740            Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1741            Event::Eof => Event::Eof,
1742        }
1743    }
1744
1745    /// Converts the event into a borrowed event.
1746    #[inline]
1747    pub fn borrow(&self) -> Event<'_> {
1748        match self {
1749            Event::Start(e) => Event::Start(e.borrow()),
1750            Event::End(e) => Event::End(e.borrow()),
1751            Event::Empty(e) => Event::Empty(e.borrow()),
1752            Event::Text(e) => Event::Text(e.borrow()),
1753            Event::Comment(e) => Event::Comment(e.borrow()),
1754            Event::CData(e) => Event::CData(e.borrow()),
1755            Event::Decl(e) => Event::Decl(e.borrow()),
1756            Event::PI(e) => Event::PI(e.borrow()),
1757            Event::DocType(e) => Event::DocType(e.borrow()),
1758            Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1759            Event::Eof => Event::Eof,
1760        }
1761    }
1762}
1763
1764impl<'a> Deref for Event<'a> {
1765    type Target = [u8];
1766
1767    fn deref(&self) -> &[u8] {
1768        match *self {
1769            Event::Start(ref e) | Event::Empty(ref e) => e,
1770            Event::End(ref e) => e,
1771            Event::Text(ref e) => e,
1772            Event::Decl(ref e) => e,
1773            Event::PI(ref e) => e,
1774            Event::CData(ref e) => e,
1775            Event::Comment(ref e) => e,
1776            Event::DocType(ref e) => e,
1777            Event::GeneralRef(ref e) => e,
1778            Event::Eof => &[],
1779        }
1780    }
1781}
1782
1783impl<'a> AsRef<Event<'a>> for Event<'a> {
1784    fn as_ref(&self) -> &Event<'a> {
1785        self
1786    }
1787}
1788
1789////////////////////////////////////////////////////////////////////////////////////////////////////
1790
1791#[inline]
1792fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1793    match content.into() {
1794        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1795        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1796    }
1797}
1798
1799fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1800where
1801    F: FnOnce(&[u8]) -> &[u8],
1802{
1803    match value {
1804        Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1805        Cow::Owned(mut bytes) => {
1806            let trimmed = trim(&bytes);
1807            if trimmed.len() != bytes.len() {
1808                bytes = trimmed.to_vec();
1809            }
1810            Cow::Owned(bytes)
1811        }
1812    }
1813}
1814
1815#[cfg(test)]
1816mod test {
1817    use super::*;
1818    use pretty_assertions::assert_eq;
1819
1820    #[test]
1821    fn bytestart_create() {
1822        let b = BytesStart::new("test");
1823        assert_eq!(b.len(), 4);
1824        assert_eq!(b.name(), QName(b"test"));
1825    }
1826
1827    #[test]
1828    fn bytestart_set_name() {
1829        let mut b = BytesStart::new("test");
1830        assert_eq!(b.len(), 4);
1831        assert_eq!(b.name(), QName(b"test"));
1832        assert_eq!(b.attributes_raw(), b"");
1833        b.push_attribute(("x", "a"));
1834        assert_eq!(b.len(), 10);
1835        assert_eq!(b.attributes_raw(), b" x=\"a\"");
1836        b.set_name(b"g");
1837        assert_eq!(b.len(), 7);
1838        assert_eq!(b.name(), QName(b"g"));
1839    }
1840
1841    #[test]
1842    fn bytestart_clear_attributes() {
1843        let mut b = BytesStart::new("test");
1844        b.push_attribute(("x", "y\"z"));
1845        b.push_attribute(("x", "y\"z"));
1846        b.clear_attributes();
1847        assert!(b.attributes().next().is_none());
1848        assert_eq!(b.len(), 4);
1849        assert_eq!(b.name(), QName(b"test"));
1850    }
1851}