quick_xml/events/
mod.rs

1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52    escape, minimal_escape, normalize_xml10_eols, normalize_xml11_eols, parse_number,
53    partial_escape, EscapeError,
54};
55use crate::name::{LocalName, QName};
56use crate::utils::{self, name_len, trim_xml_end, trim_xml_start, write_cow_string};
57use attributes::{AttrError, Attribute, Attributes};
58
59/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
60///
61/// The name can be accessed using the [`name`] or [`local_name`] methods.
62/// An iterator over the attributes is returned by the [`attributes`] method.
63///
64/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
65/// returns the content of this event between `<` and `>` or `/>`:
66///
67/// ```
68/// # use quick_xml::events::{BytesStart, Event};
69/// # use quick_xml::reader::Reader;
70/// # use pretty_assertions::assert_eq;
71/// // Remember, that \ at the end of string literal strips
72/// // all space characters to the first non-space character
73/// let mut reader = Reader::from_str("\
74///     <element a1 = 'val1' a2=\"val2\" />\
75///     <element a1 = 'val1' a2=\"val2\" >"
76/// );
77/// let content = "element a1 = 'val1' a2=\"val2\" ";
78/// let event = BytesStart::from_content(content, 7);
79///
80/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
81/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
82/// // deref coercion of &BytesStart to &[u8]
83/// assert_eq!(&event as &[u8], content.as_bytes());
84/// // AsRef<[u8]> for &T + deref coercion
85/// assert_eq!(event.as_ref(), content.as_bytes());
86/// ```
87///
88/// [`name`]: Self::name
89/// [`local_name`]: Self::local_name
90/// [`attributes`]: Self::attributes
91#[derive(Clone, Eq, PartialEq)]
92pub struct BytesStart<'a> {
93    /// content of the element, before any utf8 conversion
94    pub(crate) buf: Cow<'a, [u8]>,
95    /// end of the element name, the name starts at that the start of `buf`
96    pub(crate) name_len: usize,
97    /// Encoding used for `buf`
98    decoder: Decoder,
99}
100
101impl<'a> BytesStart<'a> {
102    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
103    #[inline]
104    pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
105        BytesStart {
106            buf: Cow::Borrowed(content),
107            name_len,
108            decoder,
109        }
110    }
111
112    /// Creates a new `BytesStart` from the given name.
113    ///
114    /// # Warning
115    ///
116    /// `name` must be a valid name.
117    #[inline]
118    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
119        let buf = str_cow_to_bytes(name);
120        BytesStart {
121            name_len: buf.len(),
122            buf,
123            decoder: Decoder::utf8(),
124        }
125    }
126
127    /// Creates a new `BytesStart` from the given content (name + attributes).
128    ///
129    /// # Warning
130    ///
131    /// `&content[..name_len]` must be a valid name, and the remainder of `content`
132    /// must be correctly-formed attributes. Neither are checked, it is possible
133    /// to generate invalid XML if `content` or `name_len` are incorrect.
134    #[inline]
135    pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
136        BytesStart {
137            buf: str_cow_to_bytes(content),
138            name_len,
139            decoder: Decoder::utf8(),
140        }
141    }
142
143    /// Converts the event into an owned event.
144    pub fn into_owned(self) -> BytesStart<'static> {
145        BytesStart {
146            buf: Cow::Owned(self.buf.into_owned()),
147            name_len: self.name_len,
148            decoder: self.decoder,
149        }
150    }
151
152    /// Converts the event into an owned event without taking ownership of Event
153    pub fn to_owned(&self) -> BytesStart<'static> {
154        BytesStart {
155            buf: Cow::Owned(self.buf.clone().into_owned()),
156            name_len: self.name_len,
157            decoder: self.decoder,
158        }
159    }
160
161    /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
162    ///
163    /// # Example
164    ///
165    /// ```
166    /// use quick_xml::events::{BytesStart, Event};
167    /// # use quick_xml::writer::Writer;
168    /// # use quick_xml::Error;
169    ///
170    /// struct SomeStruct<'a> {
171    ///     attrs: BytesStart<'a>,
172    ///     // ...
173    /// }
174    /// # impl<'a> SomeStruct<'a> {
175    /// # fn example(&self) -> Result<(), Error> {
176    /// # let mut writer = Writer::new(Vec::new());
177    ///
178    /// writer.write_event(Event::Start(self.attrs.borrow()))?;
179    /// // ...
180    /// writer.write_event(Event::End(self.attrs.to_end()))?;
181    /// # Ok(())
182    /// # }}
183    /// ```
184    ///
185    /// [`to_end`]: Self::to_end
186    pub fn borrow(&self) -> BytesStart<'_> {
187        BytesStart {
188            buf: Cow::Borrowed(&self.buf),
189            name_len: self.name_len,
190            decoder: self.decoder,
191        }
192    }
193
194    /// Creates new paired close tag
195    #[inline]
196    pub fn to_end(&self) -> BytesEnd<'_> {
197        BytesEnd::from(self.name())
198    }
199
200    /// Get the decoder, used to decode bytes, read by the reader which produces
201    /// this event, to the strings.
202    ///
203    /// When event was created manually, encoding is UTF-8.
204    ///
205    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
206    /// defaults to UTF-8.
207    ///
208    /// [`encoding`]: ../index.html#encoding
209    #[inline]
210    pub const fn decoder(&self) -> Decoder {
211        self.decoder
212    }
213
214    /// Gets the undecoded raw tag name, as present in the input stream.
215    #[inline]
216    pub fn name(&self) -> QName<'_> {
217        QName(&self.buf[..self.name_len])
218    }
219
220    /// Gets the undecoded raw local tag name (excluding namespace) as present
221    /// in the input stream.
222    ///
223    /// All content up to and including the first `:` character is removed from the tag name.
224    #[inline]
225    pub fn local_name(&self) -> LocalName<'_> {
226        self.name().into()
227    }
228
229    /// Edit the name of the BytesStart in-place
230    ///
231    /// # Warning
232    ///
233    /// `name` must be a valid name.
234    pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
235        let bytes = self.buf.to_mut();
236        bytes.splice(..self.name_len, name.iter().cloned());
237        self.name_len = name.len();
238        self
239    }
240}
241
242/// Attribute-related methods
243impl<'a> BytesStart<'a> {
244    /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
245    ///
246    /// The yielded items must be convertible to [`Attribute`] using `Into`.
247    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
248    where
249        I: IntoIterator,
250        I::Item: Into<Attribute<'b>>,
251    {
252        self.extend_attributes(attributes);
253        self
254    }
255
256    /// Add additional attributes to this tag using an iterator.
257    ///
258    /// The yielded items must be convertible to [`Attribute`] using `Into`.
259    pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
260    where
261        I: IntoIterator,
262        I::Item: Into<Attribute<'b>>,
263    {
264        for attr in attributes {
265            self.push_attribute(attr);
266        }
267        self
268    }
269
270    /// Adds an attribute to this element.
271    pub fn push_attribute<'b, A>(&mut self, attr: A)
272    where
273        A: Into<Attribute<'b>>,
274    {
275        self.buf.to_mut().push(b' ');
276        self.push_attr(attr.into());
277    }
278
279    /// Remove all attributes from the ByteStart
280    pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
281        self.buf.to_mut().truncate(self.name_len);
282        self
283    }
284
285    /// Returns an iterator over the attributes of this tag.
286    pub fn attributes(&self) -> Attributes<'_> {
287        Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
288    }
289
290    /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
291    pub fn html_attributes(&self) -> Attributes<'_> {
292        Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
293    }
294
295    /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
296    /// including the whitespace after the tag name if there is any.
297    #[inline]
298    pub fn attributes_raw(&self) -> &[u8] {
299        &self.buf[self.name_len..]
300    }
301
302    /// Try to get an attribute
303    pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
304        &'a self,
305        attr_name: N,
306    ) -> Result<Option<Attribute<'a>>, AttrError> {
307        for a in self.attributes().with_checks(false) {
308            let a = a?;
309            if a.key.as_ref() == attr_name.as_ref() {
310                return Ok(Some(a));
311            }
312        }
313        Ok(None)
314    }
315
316    /// Adds an attribute to this element.
317    pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
318        let bytes = self.buf.to_mut();
319        bytes.extend_from_slice(attr.key.as_ref());
320        bytes.extend_from_slice(b"=\"");
321        // FIXME: need to escape attribute content
322        bytes.extend_from_slice(attr.value.as_ref());
323        bytes.push(b'"');
324    }
325
326    /// Adds new line in existing element
327    pub(crate) fn push_newline(&mut self) {
328        self.buf.to_mut().push(b'\n');
329    }
330
331    /// Adds indentation bytes in existing element
332    pub(crate) fn push_indent(&mut self, indent: &[u8]) {
333        self.buf.to_mut().extend_from_slice(indent);
334    }
335}
336
337impl<'a> Debug for BytesStart<'a> {
338    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
339        write!(f, "BytesStart {{ buf: ")?;
340        write_cow_string(f, &self.buf)?;
341        write!(f, ", name_len: {} }}", self.name_len)
342    }
343}
344
345impl<'a> Deref for BytesStart<'a> {
346    type Target = [u8];
347
348    fn deref(&self) -> &[u8] {
349        &self.buf
350    }
351}
352
353#[cfg(feature = "arbitrary")]
354impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
355    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
356        let s = <&str>::arbitrary(u)?;
357        if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
358            return Err(arbitrary::Error::IncorrectFormat);
359        }
360        let mut result = Self::new(s);
361        result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?);
362        Ok(result)
363    }
364
365    fn size_hint(depth: usize) -> (usize, Option<usize>) {
366        <&str as arbitrary::Arbitrary>::size_hint(depth)
367    }
368}
369
370////////////////////////////////////////////////////////////////////////////////////////////////////
371
372/// Closing tag data (`Event::End`): `</name>`.
373///
374/// The name can be accessed using the [`name`] or [`local_name`] methods.
375///
376/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
377/// returns the content of this event between `</` and `>`.
378///
379/// Note, that inner text will not contain `>` character inside:
380///
381/// ```
382/// # use quick_xml::events::{BytesEnd, Event};
383/// # use quick_xml::reader::Reader;
384/// # use pretty_assertions::assert_eq;
385/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
386/// // Note, that this entire string considered as a .name()
387/// let content = "element a1 = 'val1' a2=\"val2\" ";
388/// let event = BytesEnd::new(content);
389///
390/// reader.config_mut().trim_markup_names_in_closing_tags = false;
391/// reader.config_mut().check_end_names = false;
392/// reader.read_event().unwrap(); // Skip `<element>`
393///
394/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
395/// assert_eq!(event.name().as_ref(), content.as_bytes());
396/// // deref coercion of &BytesEnd to &[u8]
397/// assert_eq!(&event as &[u8], content.as_bytes());
398/// // AsRef<[u8]> for &T + deref coercion
399/// assert_eq!(event.as_ref(), content.as_bytes());
400/// ```
401///
402/// [`name`]: Self::name
403/// [`local_name`]: Self::local_name
404#[derive(Clone, Eq, PartialEq)]
405pub struct BytesEnd<'a> {
406    name: Cow<'a, [u8]>,
407}
408
409impl<'a> BytesEnd<'a> {
410    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
411    #[inline]
412    pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
413        BytesEnd { name }
414    }
415
416    /// Creates a new `BytesEnd` borrowing a slice.
417    ///
418    /// # Warning
419    ///
420    /// `name` must be a valid name.
421    #[inline]
422    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
423        Self::wrap(str_cow_to_bytes(name))
424    }
425
426    /// Converts the event into an owned event.
427    pub fn into_owned(self) -> BytesEnd<'static> {
428        BytesEnd {
429            name: Cow::Owned(self.name.into_owned()),
430        }
431    }
432
433    /// Converts the event into a borrowed event.
434    #[inline]
435    pub fn borrow(&self) -> BytesEnd<'_> {
436        BytesEnd {
437            name: Cow::Borrowed(&self.name),
438        }
439    }
440
441    /// Gets the undecoded raw tag name, as present in the input stream.
442    #[inline]
443    pub fn name(&self) -> QName<'_> {
444        QName(&self.name)
445    }
446
447    /// Gets the undecoded raw local tag name (excluding namespace) as present
448    /// in the input stream.
449    ///
450    /// All content up to and including the first `:` character is removed from the tag name.
451    #[inline]
452    pub fn local_name(&self) -> LocalName<'_> {
453        self.name().into()
454    }
455}
456
457impl<'a> Debug for BytesEnd<'a> {
458    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
459        write!(f, "BytesEnd {{ name: ")?;
460        write_cow_string(f, &self.name)?;
461        write!(f, " }}")
462    }
463}
464
465impl<'a> Deref for BytesEnd<'a> {
466    type Target = [u8];
467
468    fn deref(&self) -> &[u8] {
469        &self.name
470    }
471}
472
473impl<'a> From<QName<'a>> for BytesEnd<'a> {
474    #[inline]
475    fn from(name: QName<'a>) -> Self {
476        Self::wrap(name.into_inner().into())
477    }
478}
479
480#[cfg(feature = "arbitrary")]
481impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
482    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
483        Ok(Self::new(<&str>::arbitrary(u)?))
484    }
485    fn size_hint(depth: usize) -> (usize, Option<usize>) {
486        <&str as arbitrary::Arbitrary>::size_hint(depth)
487    }
488}
489
490////////////////////////////////////////////////////////////////////////////////////////////////////
491
492/// Data from various events (most notably, `Event::Text`) that stored in XML
493/// in escaped form. Internally data is stored in escaped form.
494///
495/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
496/// returns the content of this event. In case of comment this is everything
497/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
498/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
499/// (i.e. in case of DTD the first character is never space):
500///
501/// ```
502/// # use quick_xml::events::{BytesText, Event};
503/// # use quick_xml::reader::Reader;
504/// # use pretty_assertions::assert_eq;
505/// // Remember, that \ at the end of string literal strips
506/// // all space characters to the first non-space character
507/// let mut reader = Reader::from_str("\
508///     <!DOCTYPE comment or text >\
509///     comment or text \
510///     <!--comment or text -->"
511/// );
512/// let content = "comment or text ";
513/// let event = BytesText::new(content);
514///
515/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
516/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
517/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
518/// // deref coercion of &BytesText to &[u8]
519/// assert_eq!(&event as &[u8], content.as_bytes());
520/// // AsRef<[u8]> for &T + deref coercion
521/// assert_eq!(event.as_ref(), content.as_bytes());
522/// ```
523#[derive(Clone, Eq, PartialEq)]
524pub struct BytesText<'a> {
525    /// Escaped then encoded content of the event. Content is encoded in the XML
526    /// document encoding when event comes from the reader and should be in the
527    /// document encoding when event passed to the writer
528    content: Cow<'a, [u8]>,
529    /// Encoding in which the `content` is stored inside the event
530    decoder: Decoder,
531}
532
533impl<'a> BytesText<'a> {
534    /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
535    #[inline]
536    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
537        Self {
538            content: content.into(),
539            decoder,
540        }
541    }
542
543    /// Creates a new `BytesText` from an escaped string.
544    #[inline]
545    pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
546        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
547    }
548
549    /// Creates a new `BytesText` from a string. The string is expected not to
550    /// be escaped.
551    #[inline]
552    pub fn new(content: &'a str) -> Self {
553        Self::from_escaped(escape(content))
554    }
555
556    /// Ensures that all data is owned to extend the object's lifetime if
557    /// necessary.
558    #[inline]
559    pub fn into_owned(self) -> BytesText<'static> {
560        BytesText {
561            content: self.content.into_owned().into(),
562            decoder: self.decoder,
563        }
564    }
565
566    /// Extracts the inner `Cow` from the `BytesText` event container.
567    #[inline]
568    pub fn into_inner(self) -> Cow<'a, [u8]> {
569        self.content
570    }
571
572    /// Converts the event into a borrowed event.
573    #[inline]
574    pub fn borrow(&self) -> BytesText<'_> {
575        BytesText {
576            content: Cow::Borrowed(&self.content),
577            decoder: self.decoder,
578        }
579    }
580
581    /// Decodes the content of the event.
582    ///
583    /// This will allocate if the value contains any escape sequences or in
584    /// non-UTF-8 encoding.
585    ///
586    /// This method does not normalizes end-of-line characters as required by [specification].
587    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
588    ///
589    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
590    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
591        self.decoder.decode_cow(&self.content)
592    }
593
594    /// Decodes the content of the XML 1.0 or HTML event.
595    ///
596    /// When this event produced by the reader, it uses the encoding information
597    /// associated with that reader to interpret the raw bytes contained within
598    /// this text event.
599    ///
600    /// This will allocate if the value contains any escape sequences or in non-UTF-8
601    /// encoding, or EOL normalization is required.
602    ///
603    /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
604    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
605    ///
606    /// This method also can be used to get HTML content, because rules the same.
607    ///
608    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
609    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
610    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
611    pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
612        self.decoder.content(&self.content, normalize_xml10_eols)
613    }
614
615    /// Decodes the content of the XML 1.1 event.
616    ///
617    /// When this event produced by the reader, it uses the encoding information
618    /// associated with that reader to interpret the raw bytes contained within
619    /// this text event.
620    ///
621    /// This will allocate if the value contains any escape sequences or in non-UTF-8
622    /// encoding, or EOL normalization is required.
623    ///
624    /// Note, that this method should be used only if event represents XML 1.1 content,
625    /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
626    ///
627    /// To get HTML content use [`xml10_content()`](Self::xml10_content).
628    ///
629    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
630    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
631    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
632    pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
633        self.decoder.content(&self.content, normalize_xml11_eols)
634    }
635
636    /// Alias for [`xml11_content()`](Self::xml11_content).
637    #[inline]
638    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
639        self.xml11_content()
640    }
641
642    /// Alias for [`xml10_content()`](Self::xml10_content).
643    #[inline]
644    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
645        self.xml10_content()
646    }
647
648    /// Removes leading XML whitespace bytes from text content.
649    ///
650    /// Returns `true` if content is empty after that
651    pub fn inplace_trim_start(&mut self) -> bool {
652        self.content = trim_cow(
653            replace(&mut self.content, Cow::Borrowed(b"")),
654            trim_xml_start,
655        );
656        self.content.is_empty()
657    }
658
659    /// Removes trailing XML whitespace bytes from text content.
660    ///
661    /// Returns `true` if content is empty after that
662    pub fn inplace_trim_end(&mut self) -> bool {
663        self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
664        self.content.is_empty()
665    }
666}
667
668impl<'a> Debug for BytesText<'a> {
669    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
670        write!(f, "BytesText {{ content: ")?;
671        write_cow_string(f, &self.content)?;
672        write!(f, " }}")
673    }
674}
675
676impl<'a> Deref for BytesText<'a> {
677    type Target = [u8];
678
679    fn deref(&self) -> &[u8] {
680        &self.content
681    }
682}
683
684#[cfg(feature = "arbitrary")]
685impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
686    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
687        let s = <&str>::arbitrary(u)?;
688        if !s.chars().all(char::is_alphanumeric) {
689            return Err(arbitrary::Error::IncorrectFormat);
690        }
691        Ok(Self::new(s))
692    }
693
694    fn size_hint(depth: usize) -> (usize, Option<usize>) {
695        <&str as arbitrary::Arbitrary>::size_hint(depth)
696    }
697}
698
699////////////////////////////////////////////////////////////////////////////////////////////////////
700
701/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
702/// [convert](Self::escape) it to [`BytesText`].
703///
704/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
705/// returns the content of this event between `<![CDATA[` and `]]>`.
706///
707/// Note, that inner text will not contain `]]>` sequence inside:
708///
709/// ```
710/// # use quick_xml::events::{BytesCData, Event};
711/// # use quick_xml::reader::Reader;
712/// # use pretty_assertions::assert_eq;
713/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
714/// let content = " CDATA section ";
715/// let event = BytesCData::new(content);
716///
717/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
718/// // deref coercion of &BytesCData to &[u8]
719/// assert_eq!(&event as &[u8], content.as_bytes());
720/// // AsRef<[u8]> for &T + deref coercion
721/// assert_eq!(event.as_ref(), content.as_bytes());
722/// ```
723#[derive(Clone, Eq, PartialEq)]
724pub struct BytesCData<'a> {
725    content: Cow<'a, [u8]>,
726    /// Encoding in which the `content` is stored inside the event
727    decoder: Decoder,
728}
729
730impl<'a> BytesCData<'a> {
731    /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
732    #[inline]
733    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
734        Self {
735            content: content.into(),
736            decoder,
737        }
738    }
739
740    /// Creates a new `BytesCData` from a string.
741    ///
742    /// # Warning
743    ///
744    /// `content` must not contain the `]]>` sequence. You can use
745    /// [`BytesCData::escaped`] to escape the content instead.
746    #[inline]
747    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
748        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
749    }
750
751    /// Creates an iterator of `BytesCData` from a string.
752    ///
753    /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
754    /// sections, splitting the `]]` and `>` characters, because the CDATA closing
755    /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
756    /// for each of those sections.
757    ///
758    /// # Examples
759    ///
760    /// ```
761    /// # use quick_xml::events::BytesCData;
762    /// # use pretty_assertions::assert_eq;
763    /// let content = "";
764    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
765    /// assert_eq!(cdata, &[BytesCData::new("")]);
766    ///
767    /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
768    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
769    /// assert_eq!(cdata, &[
770    ///     BytesCData::new("Certain tokens like ]]"),
771    ///     BytesCData::new("> can be difficult and <invalid>"),
772    /// ]);
773    ///
774    /// let content = "foo]]>bar]]>baz]]>quux";
775    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
776    /// assert_eq!(cdata, &[
777    ///     BytesCData::new("foo]]"),
778    ///     BytesCData::new(">bar]]"),
779    ///     BytesCData::new(">baz]]"),
780    ///     BytesCData::new(">quux"),
781    /// ]);
782    /// ```
783    #[inline]
784    pub fn escaped(content: &'a str) -> CDataIterator<'a> {
785        CDataIterator {
786            inner: utils::CDataIterator::new(content),
787        }
788    }
789
790    /// Ensures that all data is owned to extend the object's lifetime if
791    /// necessary.
792    #[inline]
793    pub fn into_owned(self) -> BytesCData<'static> {
794        BytesCData {
795            content: self.content.into_owned().into(),
796            decoder: self.decoder,
797        }
798    }
799
800    /// Extracts the inner `Cow` from the `BytesCData` event container.
801    #[inline]
802    pub fn into_inner(self) -> Cow<'a, [u8]> {
803        self.content
804    }
805
806    /// Converts the event into a borrowed event.
807    #[inline]
808    pub fn borrow(&self) -> BytesCData<'_> {
809        BytesCData {
810            content: Cow::Borrowed(&self.content),
811            decoder: self.decoder,
812        }
813    }
814
815    /// Converts this CDATA content to an escaped version, that can be written
816    /// as an usual text in XML.
817    ///
818    /// This function performs following replacements:
819    ///
820    /// | Character | Replacement
821    /// |-----------|------------
822    /// | `<`       | `&lt;`
823    /// | `>`       | `&gt;`
824    /// | `&`       | `&amp;`
825    /// | `'`       | `&apos;`
826    /// | `"`       | `&quot;`
827    pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
828        let decoded = self.decode()?;
829        Ok(BytesText::wrap(
830            match escape(decoded) {
831                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
832                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
833            },
834            Decoder::utf8(),
835        ))
836    }
837
838    /// Converts this CDATA content to an escaped version, that can be written
839    /// as an usual text in XML.
840    ///
841    /// In XML text content, it is allowed (though not recommended) to leave
842    /// the quote special characters `"` and `'` unescaped.
843    ///
844    /// This function performs following replacements:
845    ///
846    /// | Character | Replacement
847    /// |-----------|------------
848    /// | `<`       | `&lt;`
849    /// | `>`       | `&gt;`
850    /// | `&`       | `&amp;`
851    pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
852        let decoded = self.decode()?;
853        Ok(BytesText::wrap(
854            match partial_escape(decoded) {
855                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
856                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
857            },
858            Decoder::utf8(),
859        ))
860    }
861
862    /// Converts this CDATA content to an escaped version, that can be written
863    /// as an usual text in XML. This method escapes only those characters that
864    /// must be escaped according to the [specification].
865    ///
866    /// This function performs following replacements:
867    ///
868    /// | Character | Replacement
869    /// |-----------|------------
870    /// | `<`       | `&lt;`
871    /// | `&`       | `&amp;`
872    ///
873    /// [specification]: https://www.w3.org/TR/xml11/#syntax
874    pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
875        let decoded = self.decode()?;
876        Ok(BytesText::wrap(
877            match minimal_escape(decoded) {
878                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
879                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
880            },
881            Decoder::utf8(),
882        ))
883    }
884
885    /// Decodes the raw input byte content of the CDATA section into a string,
886    /// without performing XML entity escaping.
887    ///
888    /// When this event produced by the XML reader, it uses the encoding information
889    /// associated with that reader to interpret the raw bytes contained within this
890    /// CDATA event.
891    ///
892    /// This method does not normalizes end-of-line characters as required by [specification].
893    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
894    ///
895    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
896    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
897        self.decoder.decode_cow(&self.content)
898    }
899
900    /// Decodes the raw input byte content of the CDATA section of the XML 1.0 or
901    /// HTML event into a string.
902    ///
903    /// When this event produced by the reader, it uses the encoding information
904    /// associated with that reader to interpret the raw bytes contained within
905    /// this CDATA event.
906    ///
907    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
908    /// is required.
909    ///
910    /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
911    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
912    ///
913    /// This method also can be used to get HTML content, because rules the same.
914    ///
915    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
916    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
917    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
918    pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
919        self.decoder.content(&self.content, normalize_xml10_eols)
920    }
921
922    /// Decodes the raw input byte content of the CDATA section of the XML 1.1 event
923    /// into a string.
924    ///
925    /// When this event produced by the reader, it uses the encoding information
926    /// associated with that reader to interpret the raw bytes contained within
927    /// this CDATA event.
928    ///
929    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
930    /// is required.
931    ///
932    /// Note, that this method should be used only if event represents XML 1.1 content,
933    /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
934    ///
935    /// To get HTML content use [`xml10_content()`](Self::xml10_content).
936    ///
937    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
938    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
939    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
940    pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
941        self.decoder.content(&self.content, normalize_xml11_eols)
942    }
943
944    /// Alias for [`xml11_content()`](Self::xml11_content).
945    #[inline]
946    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
947        self.xml11_content()
948    }
949
950    /// Alias for [`xml10_content()`](Self::xml10_content).
951    #[inline]
952    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
953        self.xml10_content()
954    }
955}
956
957impl<'a> Debug for BytesCData<'a> {
958    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
959        write!(f, "BytesCData {{ content: ")?;
960        write_cow_string(f, &self.content)?;
961        write!(f, " }}")
962    }
963}
964
965impl<'a> Deref for BytesCData<'a> {
966    type Target = [u8];
967
968    fn deref(&self) -> &[u8] {
969        &self.content
970    }
971}
972
973#[cfg(feature = "arbitrary")]
974impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
975    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
976        Ok(Self::new(<&str>::arbitrary(u)?))
977    }
978    fn size_hint(depth: usize) -> (usize, Option<usize>) {
979        <&str as arbitrary::Arbitrary>::size_hint(depth)
980    }
981}
982
983/// Iterator over `CDATA` sections in a string.
984///
985/// This iterator is created by the [`BytesCData::escaped`] method.
986#[derive(Debug, Clone)]
987pub struct CDataIterator<'a> {
988    inner: utils::CDataIterator<'a>,
989}
990
991impl<'a> Iterator for CDataIterator<'a> {
992    type Item = BytesCData<'a>;
993
994    fn next(&mut self) -> Option<BytesCData<'a>> {
995        self.inner
996            .next()
997            .map(|slice| BytesCData::wrap(slice.as_bytes(), Decoder::utf8()))
998    }
999}
1000
1001impl FusedIterator for CDataIterator<'_> {}
1002
1003////////////////////////////////////////////////////////////////////////////////////////////////////
1004
1005/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
1006///
1007/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1008/// returns the content of this event between `<?` and `?>`.
1009///
1010/// Note, that inner text will not contain `?>` sequence inside:
1011///
1012/// ```
1013/// # use quick_xml::events::{BytesPI, Event};
1014/// # use quick_xml::reader::Reader;
1015/// # use pretty_assertions::assert_eq;
1016/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
1017/// let content = "processing instruction >:-<~ ";
1018/// let event = BytesPI::new(content);
1019///
1020/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
1021/// // deref coercion of &BytesPI to &[u8]
1022/// assert_eq!(&event as &[u8], content.as_bytes());
1023/// // AsRef<[u8]> for &T + deref coercion
1024/// assert_eq!(event.as_ref(), content.as_bytes());
1025/// ```
1026///
1027/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
1028#[derive(Clone, Eq, PartialEq)]
1029pub struct BytesPI<'a> {
1030    content: BytesStart<'a>,
1031}
1032
1033impl<'a> BytesPI<'a> {
1034    /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1035    #[inline]
1036    pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
1037        Self {
1038            content: BytesStart::wrap(content, target_len, decoder),
1039        }
1040    }
1041
1042    /// Creates a new `BytesPI` from a string.
1043    ///
1044    /// # Warning
1045    ///
1046    /// `content` must not contain the `?>` sequence.
1047    #[inline]
1048    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1049        let buf = str_cow_to_bytes(content);
1050        let name_len = name_len(&buf);
1051        Self {
1052            content: BytesStart {
1053                buf,
1054                name_len,
1055                decoder: Decoder::utf8(),
1056            },
1057        }
1058    }
1059
1060    /// Ensures that all data is owned to extend the object's lifetime if
1061    /// necessary.
1062    #[inline]
1063    pub fn into_owned(self) -> BytesPI<'static> {
1064        BytesPI {
1065            content: self.content.into_owned(),
1066        }
1067    }
1068
1069    /// Extracts the inner `Cow` from the `BytesPI` event container.
1070    #[inline]
1071    pub fn into_inner(self) -> Cow<'a, [u8]> {
1072        self.content.buf
1073    }
1074
1075    /// Converts the event into a borrowed event.
1076    #[inline]
1077    pub fn borrow(&self) -> BytesPI<'_> {
1078        BytesPI {
1079            content: self.content.borrow(),
1080        }
1081    }
1082
1083    /// A target used to identify the application to which the instruction is directed.
1084    ///
1085    /// # Example
1086    ///
1087    /// ```
1088    /// # use pretty_assertions::assert_eq;
1089    /// use quick_xml::events::BytesPI;
1090    ///
1091    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1092    /// assert_eq!(instruction.target(), b"xml-stylesheet");
1093    /// ```
1094    #[inline]
1095    pub fn target(&self) -> &[u8] {
1096        self.content.name().0
1097    }
1098
1099    /// Content of the processing instruction. Contains everything between target
1100    /// name and the end of the instruction. A direct consequence is that the first
1101    /// character is always a space character.
1102    ///
1103    /// # Example
1104    ///
1105    /// ```
1106    /// # use pretty_assertions::assert_eq;
1107    /// use quick_xml::events::BytesPI;
1108    ///
1109    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1110    /// assert_eq!(instruction.content(), br#" href="style.css""#);
1111    /// ```
1112    #[inline]
1113    pub fn content(&self) -> &[u8] {
1114        self.content.attributes_raw()
1115    }
1116
1117    /// A view of the processing instructions' content as a list of key-value pairs.
1118    ///
1119    /// Key-value pairs are used in some processing instructions, for example in
1120    /// `<?xml-stylesheet?>`.
1121    ///
1122    /// Returned iterator does not validate attribute values as may required by
1123    /// target's rules. For example, it doesn't check that substring `?>` is not
1124    /// present in the attribute value. That shouldn't be the problem when event
1125    /// is produced by the reader, because reader detects end of processing instruction
1126    /// by the first `?>` sequence, as required by the specification, and therefore
1127    /// this sequence cannot appear inside it.
1128    ///
1129    /// # Example
1130    ///
1131    /// ```
1132    /// # use pretty_assertions::assert_eq;
1133    /// use std::borrow::Cow;
1134    /// use quick_xml::events::attributes::Attribute;
1135    /// use quick_xml::events::BytesPI;
1136    /// use quick_xml::name::QName;
1137    ///
1138    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1139    /// for attr in instruction.attributes() {
1140    ///     assert_eq!(attr, Ok(Attribute {
1141    ///         key: QName(b"href"),
1142    ///         value: Cow::Borrowed(b"style.css"),
1143    ///     }));
1144    /// }
1145    /// ```
1146    #[inline]
1147    pub fn attributes(&self) -> Attributes<'_> {
1148        self.content.attributes()
1149    }
1150}
1151
1152impl<'a> Debug for BytesPI<'a> {
1153    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1154        write!(f, "BytesPI {{ content: ")?;
1155        write_cow_string(f, &self.content.buf)?;
1156        write!(f, " }}")
1157    }
1158}
1159
1160impl<'a> Deref for BytesPI<'a> {
1161    type Target = [u8];
1162
1163    fn deref(&self) -> &[u8] {
1164        &self.content
1165    }
1166}
1167
1168#[cfg(feature = "arbitrary")]
1169impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1170    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1171        Ok(Self::new(<&str>::arbitrary(u)?))
1172    }
1173    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1174        <&str as arbitrary::Arbitrary>::size_hint(depth)
1175    }
1176}
1177
1178////////////////////////////////////////////////////////////////////////////////////////////////////
1179
1180/// An XML declaration (`Event::Decl`).
1181///
1182/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1183///
1184/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1185/// returns the content of this event between `<?` and `?>`.
1186///
1187/// Note, that inner text will not contain `?>` sequence inside:
1188///
1189/// ```
1190/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1191/// # use quick_xml::reader::Reader;
1192/// # use pretty_assertions::assert_eq;
1193/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1194/// let content = "xml version = '1.0' ";
1195/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1196///
1197/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1198/// // deref coercion of &BytesDecl to &[u8]
1199/// assert_eq!(&event as &[u8], content.as_bytes());
1200/// // AsRef<[u8]> for &T + deref coercion
1201/// assert_eq!(event.as_ref(), content.as_bytes());
1202/// ```
1203#[derive(Clone, Debug, Eq, PartialEq)]
1204pub struct BytesDecl<'a> {
1205    content: BytesStart<'a>,
1206}
1207
1208impl<'a> BytesDecl<'a> {
1209    /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1210    /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1211    /// attribute.
1212    ///
1213    /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1214    /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1215    /// the double quote character is not allowed in any of the attribute values.
1216    pub fn new(
1217        version: &str,
1218        encoding: Option<&str>,
1219        standalone: Option<&str>,
1220    ) -> BytesDecl<'static> {
1221        // Compute length of the buffer based on supplied attributes
1222        // ' encoding=""'   => 12
1223        let encoding_attr_len = if let Some(xs) = encoding {
1224            12 + xs.len()
1225        } else {
1226            0
1227        };
1228        // ' standalone=""' => 14
1229        let standalone_attr_len = if let Some(xs) = standalone {
1230            14 + xs.len()
1231        } else {
1232            0
1233        };
1234        // 'xml version=""' => 14
1235        let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1236
1237        buf.push_str("xml version=\"");
1238        buf.push_str(version);
1239
1240        if let Some(encoding_val) = encoding {
1241            buf.push_str("\" encoding=\"");
1242            buf.push_str(encoding_val);
1243        }
1244
1245        if let Some(standalone_val) = standalone {
1246            buf.push_str("\" standalone=\"");
1247            buf.push_str(standalone_val);
1248        }
1249        buf.push('"');
1250
1251        BytesDecl {
1252            content: BytesStart::from_content(buf, 3),
1253        }
1254    }
1255
1256    /// Creates a `BytesDecl` from a `BytesStart`
1257    pub const fn from_start(start: BytesStart<'a>) -> Self {
1258        Self { content: start }
1259    }
1260
1261    /// Gets xml version, excluding quotes (`'` or `"`).
1262    ///
1263    /// According to the [grammar], the version *must* be the first thing in the declaration.
1264    /// This method tries to extract the first thing in the declaration and return it.
1265    /// In case of multiple attributes value of the first one is returned.
1266    ///
1267    /// If version is missed in the declaration, or the first thing is not a version,
1268    /// [`IllFormedError::MissingDeclVersion`] will be returned.
1269    ///
1270    /// # Examples
1271    ///
1272    /// ```
1273    /// use quick_xml::errors::{Error, IllFormedError};
1274    /// use quick_xml::events::{BytesDecl, BytesStart};
1275    ///
1276    /// // <?xml version='1.1'?>
1277    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1278    /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1279    ///
1280    /// // <?xml version='1.0' version='1.1'?>
1281    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1282    /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1283    ///
1284    /// // <?xml encoding='utf-8'?>
1285    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1286    /// match decl.version() {
1287    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1288    ///     _ => assert!(false),
1289    /// }
1290    ///
1291    /// // <?xml encoding='utf-8' version='1.1'?>
1292    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1293    /// match decl.version() {
1294    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1295    ///     _ => assert!(false),
1296    /// }
1297    ///
1298    /// // <?xml?>
1299    /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1300    /// match decl.version() {
1301    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1302    ///     _ => assert!(false),
1303    /// }
1304    /// ```
1305    ///
1306    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1307    pub fn version(&self) -> Result<Cow<'_, [u8]>, Error> {
1308        // The version *must* be the first thing in the declaration.
1309        match self.content.attributes().with_checks(false).next() {
1310            Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1311            // first attribute was not "version"
1312            Some(Ok(a)) => {
1313                let found = from_utf8(a.key.as_ref())
1314                    .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1315                    .to_string();
1316                Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1317                    found,
1318                ))))
1319            }
1320            // error parsing attributes
1321            Some(Err(e)) => Err(e.into()),
1322            // no attributes
1323            None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1324        }
1325    }
1326
1327    /// Gets xml encoding, excluding quotes (`'` or `"`).
1328    ///
1329    /// Although according to the [grammar] encoding must appear before `"standalone"`
1330    /// and after `"version"`, this method does not check that. The first occurrence
1331    /// of the attribute will be returned even if there are several. Also, method does
1332    /// not restrict symbols that can forming the encoding, so the returned encoding
1333    /// name may not correspond to the grammar.
1334    ///
1335    /// # Examples
1336    ///
1337    /// ```
1338    /// use std::borrow::Cow;
1339    /// use quick_xml::Error;
1340    /// use quick_xml::events::{BytesDecl, BytesStart};
1341    ///
1342    /// // <?xml version='1.1'?>
1343    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1344    /// assert!(decl.encoding().is_none());
1345    ///
1346    /// // <?xml encoding='utf-8'?>
1347    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1348    /// match decl.encoding() {
1349    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1350    ///     _ => assert!(false),
1351    /// }
1352    ///
1353    /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1354    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1355    /// match decl.encoding() {
1356    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1357    ///     _ => assert!(false),
1358    /// }
1359    /// ```
1360    ///
1361    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1362    pub fn encoding(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1363        self.content
1364            .try_get_attribute("encoding")
1365            .map(|a| a.map(|a| a.value))
1366            .transpose()
1367    }
1368
1369    /// Gets xml standalone, excluding quotes (`'` or `"`).
1370    ///
1371    /// Although according to the [grammar] standalone flag must appear after `"version"`
1372    /// and `"encoding"`, this method does not check that. The first occurrence of the
1373    /// attribute will be returned even if there are several. Also, method does not
1374    /// restrict symbols that can forming the value, so the returned flag name may not
1375    /// correspond to the grammar.
1376    ///
1377    /// # Examples
1378    ///
1379    /// ```
1380    /// use std::borrow::Cow;
1381    /// use quick_xml::Error;
1382    /// use quick_xml::events::{BytesDecl, BytesStart};
1383    ///
1384    /// // <?xml version='1.1'?>
1385    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1386    /// assert!(decl.standalone().is_none());
1387    ///
1388    /// // <?xml standalone='yes'?>
1389    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1390    /// match decl.standalone() {
1391    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1392    ///     _ => assert!(false),
1393    /// }
1394    ///
1395    /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1396    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1397    /// match decl.standalone() {
1398    ///     Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1399    ///     _ => assert!(false),
1400    /// }
1401    /// ```
1402    ///
1403    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1404    pub fn standalone(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1405        self.content
1406            .try_get_attribute("standalone")
1407            .map(|a| a.map(|a| a.value))
1408            .transpose()
1409    }
1410
1411    /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1412    /// algorithm.
1413    ///
1414    /// If encoding in not known, or `encoding` key was not found, returns `None`.
1415    /// In case of duplicated `encoding` key, encoding, corresponding to the first
1416    /// one, is returned.
1417    #[cfg(feature = "encoding")]
1418    pub fn encoder(&self) -> Option<&'static Encoding> {
1419        self.encoding()
1420            .and_then(|e| e.ok())
1421            .and_then(|e| Encoding::for_label(&e))
1422    }
1423
1424    /// Converts the event into an owned event.
1425    pub fn into_owned(self) -> BytesDecl<'static> {
1426        BytesDecl {
1427            content: self.content.into_owned(),
1428        }
1429    }
1430
1431    /// Converts the event into a borrowed event.
1432    #[inline]
1433    pub fn borrow(&self) -> BytesDecl<'_> {
1434        BytesDecl {
1435            content: self.content.borrow(),
1436        }
1437    }
1438}
1439
1440impl<'a> Deref for BytesDecl<'a> {
1441    type Target = [u8];
1442
1443    fn deref(&self) -> &[u8] {
1444        &self.content
1445    }
1446}
1447
1448#[cfg(feature = "arbitrary")]
1449impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1450    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1451        Ok(Self::new(
1452            <&str>::arbitrary(u)?,
1453            Option::<&str>::arbitrary(u)?,
1454            Option::<&str>::arbitrary(u)?,
1455        ))
1456    }
1457
1458    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1459        <&str as arbitrary::Arbitrary>::size_hint(depth)
1460    }
1461}
1462
1463////////////////////////////////////////////////////////////////////////////////////////////////////
1464
1465/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1466///
1467/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1468/// returns the content of this event between `&` and `;`:
1469///
1470/// ```
1471/// # use quick_xml::events::{BytesRef, Event};
1472/// # use quick_xml::reader::Reader;
1473/// # use pretty_assertions::assert_eq;
1474/// let mut reader = Reader::from_str(r#"&entity;"#);
1475/// let content = "entity";
1476/// let event = BytesRef::new(content);
1477///
1478/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1479/// // deref coercion of &BytesRef to &[u8]
1480/// assert_eq!(&event as &[u8], content.as_bytes());
1481/// // AsRef<[u8]> for &T + deref coercion
1482/// assert_eq!(event.as_ref(), content.as_bytes());
1483/// ```
1484#[derive(Clone, Eq, PartialEq)]
1485pub struct BytesRef<'a> {
1486    content: Cow<'a, [u8]>,
1487    /// Encoding in which the `content` is stored inside the event.
1488    decoder: Decoder,
1489}
1490
1491impl<'a> BytesRef<'a> {
1492    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1493    #[inline]
1494    pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1495        Self {
1496            content: Cow::Borrowed(content),
1497            decoder,
1498        }
1499    }
1500
1501    /// Creates a new `BytesRef` borrowing a slice.
1502    ///
1503    /// # Warning
1504    ///
1505    /// `name` must be a valid name.
1506    #[inline]
1507    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1508        Self {
1509            content: str_cow_to_bytes(name),
1510            decoder: Decoder::utf8(),
1511        }
1512    }
1513
1514    /// Converts the event into an owned event.
1515    pub fn into_owned(self) -> BytesRef<'static> {
1516        BytesRef {
1517            content: Cow::Owned(self.content.into_owned()),
1518            decoder: self.decoder,
1519        }
1520    }
1521
1522    /// Extracts the inner `Cow` from the `BytesRef` event container.
1523    #[inline]
1524    pub fn into_inner(self) -> Cow<'a, [u8]> {
1525        self.content
1526    }
1527
1528    /// Converts the event into a borrowed event.
1529    #[inline]
1530    pub fn borrow(&self) -> BytesRef<'_> {
1531        BytesRef {
1532            content: Cow::Borrowed(&self.content),
1533            decoder: self.decoder,
1534        }
1535    }
1536
1537    /// Decodes the content of the event.
1538    ///
1539    /// This will allocate if the value contains any escape sequences or in
1540    /// non-UTF-8 encoding.
1541    ///
1542    /// This method does not normalizes end-of-line characters as required by [specification].
1543    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
1544    ///
1545    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
1546    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1547        self.decoder.decode_cow(&self.content)
1548    }
1549
1550    /// Decodes the content of the XML 1.0 or HTML event.
1551    ///
1552    /// When this event produced by the reader, it uses the encoding information
1553    /// associated with that reader to interpret the raw bytes contained within
1554    /// this general reference event.
1555    ///
1556    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1557    /// is required.
1558    ///
1559    /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
1560    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1561    ///
1562    /// This method also can be used to get HTML content, because rules the same.
1563    ///
1564    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1565    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1566    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1567    pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1568        self.decoder.content(&self.content, normalize_xml10_eols)
1569    }
1570
1571    /// Decodes the content of the XML 1.1 event.
1572    ///
1573    /// When this event produced by the reader, it uses the encoding information
1574    /// associated with that reader to interpret the raw bytes contained within
1575    /// this general reference event.
1576    ///
1577    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1578    /// is required.
1579    ///
1580    /// Note, that this method should be used only if event represents XML 1.1 content,
1581    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1582    ///
1583    /// To get HTML content use [`xml10_content()`](Self::xml10_content).
1584    ///
1585    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1586    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1587    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1588    pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1589        self.decoder.content(&self.content, normalize_xml11_eols)
1590    }
1591
1592    /// Alias for [`xml11_content()`](Self::xml11_content).
1593    #[inline]
1594    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1595        self.xml11_content()
1596    }
1597
1598    /// Alias for [`xml10_content()`](Self::xml10_content).
1599    #[inline]
1600    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1601        self.xml10_content()
1602    }
1603
1604    /// Returns `true` if the specified reference represents the character reference
1605    /// (`&#<number>;`).
1606    ///
1607    /// ```
1608    /// # use quick_xml::events::BytesRef;
1609    /// # use pretty_assertions::assert_eq;
1610    /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1611    /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1612    /// assert_eq!(BytesRef::new("lt"  ).is_char_ref(), false);
1613    /// ```
1614    pub fn is_char_ref(&self) -> bool {
1615        matches!(self.content.first(), Some(b'#'))
1616    }
1617
1618    /// If this reference represents character reference, then resolves it and
1619    /// returns the character, otherwise returns `None`.
1620    ///
1621    /// This method does not check if character is allowed for XML, in other words,
1622    /// well-formedness constraint [WFC: Legal Char] is not enforced.
1623    /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1624    ///
1625    /// ```
1626    /// # use quick_xml::events::BytesRef;
1627    /// # use pretty_assertions::assert_eq;
1628    /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1629    /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1630    /// assert_eq!(BytesRef::new("lt"  ).resolve_char_ref().unwrap(), None);
1631    /// ```
1632    ///
1633    /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1634    pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1635        if let Some(num) = self.decode()?.strip_prefix('#') {
1636            let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1637            return Ok(Some(ch));
1638        }
1639        Ok(None)
1640    }
1641}
1642
1643impl<'a> Debug for BytesRef<'a> {
1644    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1645        write!(f, "BytesRef {{ content: ")?;
1646        write_cow_string(f, &self.content)?;
1647        write!(f, " }}")
1648    }
1649}
1650
1651impl<'a> Deref for BytesRef<'a> {
1652    type Target = [u8];
1653
1654    fn deref(&self) -> &[u8] {
1655        &self.content
1656    }
1657}
1658
1659#[cfg(feature = "arbitrary")]
1660impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1661    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1662        Ok(Self::new(<&str>::arbitrary(u)?))
1663    }
1664
1665    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1666        <&str as arbitrary::Arbitrary>::size_hint(depth)
1667    }
1668}
1669
1670////////////////////////////////////////////////////////////////////////////////////////////////////
1671
1672/// Event emitted by [`Reader::read_event_into`].
1673///
1674/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1675#[derive(Clone, Debug, Eq, PartialEq)]
1676#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1677pub enum Event<'a> {
1678    /// Start tag (with attributes) `<tag attr="value">`.
1679    Start(BytesStart<'a>),
1680    /// End tag `</tag>`.
1681    End(BytesEnd<'a>),
1682    /// Empty element tag (with attributes) `<tag attr="value" />`.
1683    Empty(BytesStart<'a>),
1684    /// Escaped character data between tags.
1685    Text(BytesText<'a>),
1686    /// Unescaped character data stored in `<![CDATA[...]]>`.
1687    CData(BytesCData<'a>),
1688    /// Comment `<!-- ... -->`.
1689    Comment(BytesText<'a>),
1690    /// XML declaration `<?xml ...?>`.
1691    Decl(BytesDecl<'a>),
1692    /// Processing instruction `<?...?>`.
1693    PI(BytesPI<'a>),
1694    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1695    DocType(BytesText<'a>),
1696    /// General reference `&entity;` in the textual data. Can be either an entity
1697    /// reference, or a character reference.
1698    GeneralRef(BytesRef<'a>),
1699    /// End of XML document.
1700    Eof,
1701}
1702
1703impl<'a> Event<'a> {
1704    /// Converts the event to an owned version, untied to the lifetime of
1705    /// buffer used when reading but incurring a new, separate allocation.
1706    pub fn into_owned(self) -> Event<'static> {
1707        match self {
1708            Event::Start(e) => Event::Start(e.into_owned()),
1709            Event::End(e) => Event::End(e.into_owned()),
1710            Event::Empty(e) => Event::Empty(e.into_owned()),
1711            Event::Text(e) => Event::Text(e.into_owned()),
1712            Event::Comment(e) => Event::Comment(e.into_owned()),
1713            Event::CData(e) => Event::CData(e.into_owned()),
1714            Event::Decl(e) => Event::Decl(e.into_owned()),
1715            Event::PI(e) => Event::PI(e.into_owned()),
1716            Event::DocType(e) => Event::DocType(e.into_owned()),
1717            Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1718            Event::Eof => Event::Eof,
1719        }
1720    }
1721
1722    /// Converts the event into a borrowed event.
1723    #[inline]
1724    pub fn borrow(&self) -> Event<'_> {
1725        match self {
1726            Event::Start(e) => Event::Start(e.borrow()),
1727            Event::End(e) => Event::End(e.borrow()),
1728            Event::Empty(e) => Event::Empty(e.borrow()),
1729            Event::Text(e) => Event::Text(e.borrow()),
1730            Event::Comment(e) => Event::Comment(e.borrow()),
1731            Event::CData(e) => Event::CData(e.borrow()),
1732            Event::Decl(e) => Event::Decl(e.borrow()),
1733            Event::PI(e) => Event::PI(e.borrow()),
1734            Event::DocType(e) => Event::DocType(e.borrow()),
1735            Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1736            Event::Eof => Event::Eof,
1737        }
1738    }
1739}
1740
1741impl<'a> Deref for Event<'a> {
1742    type Target = [u8];
1743
1744    fn deref(&self) -> &[u8] {
1745        match *self {
1746            Event::Start(ref e) | Event::Empty(ref e) => e,
1747            Event::End(ref e) => e,
1748            Event::Text(ref e) => e,
1749            Event::Decl(ref e) => e,
1750            Event::PI(ref e) => e,
1751            Event::CData(ref e) => e,
1752            Event::Comment(ref e) => e,
1753            Event::DocType(ref e) => e,
1754            Event::GeneralRef(ref e) => e,
1755            Event::Eof => &[],
1756        }
1757    }
1758}
1759
1760impl<'a> AsRef<Event<'a>> for Event<'a> {
1761    fn as_ref(&self) -> &Event<'a> {
1762        self
1763    }
1764}
1765
1766////////////////////////////////////////////////////////////////////////////////////////////////////
1767
1768#[inline]
1769fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1770    match content.into() {
1771        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1772        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1773    }
1774}
1775
1776fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1777where
1778    F: FnOnce(&[u8]) -> &[u8],
1779{
1780    match value {
1781        Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1782        Cow::Owned(mut bytes) => {
1783            let trimmed = trim(&bytes);
1784            if trimmed.len() != bytes.len() {
1785                bytes = trimmed.to_vec();
1786            }
1787            Cow::Owned(bytes)
1788        }
1789    }
1790}
1791
1792#[cfg(test)]
1793mod test {
1794    use super::*;
1795    use pretty_assertions::assert_eq;
1796
1797    #[test]
1798    fn bytestart_create() {
1799        let b = BytesStart::new("test");
1800        assert_eq!(b.len(), 4);
1801        assert_eq!(b.name(), QName(b"test"));
1802    }
1803
1804    #[test]
1805    fn bytestart_set_name() {
1806        let mut b = BytesStart::new("test");
1807        assert_eq!(b.len(), 4);
1808        assert_eq!(b.name(), QName(b"test"));
1809        assert_eq!(b.attributes_raw(), b"");
1810        b.push_attribute(("x", "a"));
1811        assert_eq!(b.len(), 10);
1812        assert_eq!(b.attributes_raw(), b" x=\"a\"");
1813        b.set_name(b"g");
1814        assert_eq!(b.len(), 7);
1815        assert_eq!(b.name(), QName(b"g"));
1816    }
1817
1818    #[test]
1819    fn bytestart_clear_attributes() {
1820        let mut b = BytesStart::new("test");
1821        b.push_attribute(("x", "y\"z"));
1822        b.push_attribute(("x", "y\"z"));
1823        b.clear_attributes();
1824        assert!(b.attributes().next().is_none());
1825        assert_eq!(b.len(), 4);
1826        assert_eq!(b.name(), QName(b"test"));
1827    }
1828}