quick_xml/events/mod.rs
1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52 escape, minimal_escape, normalize_xml10_eols, normalize_xml11_eols, parse_number,
53 partial_escape, EscapeError,
54};
55use crate::name::{LocalName, QName};
56use crate::utils::{self, name_len, trim_xml_end, trim_xml_start, write_cow_string};
57use attributes::{AttrError, Attribute, Attributes};
58
59/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
60///
61/// The name can be accessed using the [`name`] or [`local_name`] methods.
62/// An iterator over the attributes is returned by the [`attributes`] method.
63///
64/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
65/// returns the content of this event between `<` and `>` or `/>`:
66///
67/// ```
68/// # use quick_xml::events::{BytesStart, Event};
69/// # use quick_xml::reader::Reader;
70/// # use pretty_assertions::assert_eq;
71/// // Remember, that \ at the end of string literal strips
72/// // all space characters to the first non-space character
73/// let mut reader = Reader::from_str("\
74/// <element a1 = 'val1' a2=\"val2\" />\
75/// <element a1 = 'val1' a2=\"val2\" >"
76/// );
77/// let content = "element a1 = 'val1' a2=\"val2\" ";
78/// let event = BytesStart::from_content(content, 7);
79///
80/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
81/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
82/// // deref coercion of &BytesStart to &[u8]
83/// assert_eq!(&event as &[u8], content.as_bytes());
84/// // AsRef<[u8]> for &T + deref coercion
85/// assert_eq!(event.as_ref(), content.as_bytes());
86/// ```
87///
88/// [`name`]: Self::name
89/// [`local_name`]: Self::local_name
90/// [`attributes`]: Self::attributes
91#[derive(Clone, Eq, PartialEq)]
92pub struct BytesStart<'a> {
93 /// content of the element, before any utf8 conversion
94 pub(crate) buf: Cow<'a, [u8]>,
95 /// end of the element name, the name starts at that the start of `buf`
96 pub(crate) name_len: usize,
97 /// Encoding used for `buf`
98 decoder: Decoder,
99}
100
101impl<'a> BytesStart<'a> {
102 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
103 #[inline]
104 pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
105 BytesStart {
106 buf: Cow::Borrowed(content),
107 name_len,
108 decoder,
109 }
110 }
111
112 /// Creates a new `BytesStart` from the given name.
113 ///
114 /// # Warning
115 ///
116 /// `name` must be a valid name.
117 #[inline]
118 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
119 let buf = str_cow_to_bytes(name);
120 BytesStart {
121 name_len: buf.len(),
122 buf,
123 decoder: Decoder::utf8(),
124 }
125 }
126
127 /// Creates a new `BytesStart` from the given content (name + attributes).
128 ///
129 /// # Warning
130 ///
131 /// `&content[..name_len]` must be a valid name, and the remainder of `content`
132 /// must be correctly-formed attributes. Neither are checked, it is possible
133 /// to generate invalid XML if `content` or `name_len` are incorrect.
134 #[inline]
135 pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
136 BytesStart {
137 buf: str_cow_to_bytes(content),
138 name_len,
139 decoder: Decoder::utf8(),
140 }
141 }
142
143 /// Converts the event into an owned event.
144 pub fn into_owned(self) -> BytesStart<'static> {
145 BytesStart {
146 buf: Cow::Owned(self.buf.into_owned()),
147 name_len: self.name_len,
148 decoder: self.decoder,
149 }
150 }
151
152 /// Converts the event into an owned event without taking ownership of Event
153 pub fn to_owned(&self) -> BytesStart<'static> {
154 BytesStart {
155 buf: Cow::Owned(self.buf.clone().into_owned()),
156 name_len: self.name_len,
157 decoder: self.decoder,
158 }
159 }
160
161 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
162 ///
163 /// # Example
164 ///
165 /// ```
166 /// use quick_xml::events::{BytesStart, Event};
167 /// # use quick_xml::writer::Writer;
168 /// # use quick_xml::Error;
169 ///
170 /// struct SomeStruct<'a> {
171 /// attrs: BytesStart<'a>,
172 /// // ...
173 /// }
174 /// # impl<'a> SomeStruct<'a> {
175 /// # fn example(&self) -> Result<(), Error> {
176 /// # let mut writer = Writer::new(Vec::new());
177 ///
178 /// writer.write_event(Event::Start(self.attrs.borrow()))?;
179 /// // ...
180 /// writer.write_event(Event::End(self.attrs.to_end()))?;
181 /// # Ok(())
182 /// # }}
183 /// ```
184 ///
185 /// [`to_end`]: Self::to_end
186 pub fn borrow(&self) -> BytesStart<'_> {
187 BytesStart {
188 buf: Cow::Borrowed(&self.buf),
189 name_len: self.name_len,
190 decoder: self.decoder,
191 }
192 }
193
194 /// Creates new paired close tag
195 #[inline]
196 pub fn to_end(&self) -> BytesEnd<'_> {
197 BytesEnd::from(self.name())
198 }
199
200 /// Get the decoder, used to decode bytes, read by the reader which produces
201 /// this event, to the strings.
202 ///
203 /// When event was created manually, encoding is UTF-8.
204 ///
205 /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
206 /// defaults to UTF-8.
207 ///
208 /// [`encoding`]: ../index.html#encoding
209 #[inline]
210 pub const fn decoder(&self) -> Decoder {
211 self.decoder
212 }
213
214 /// Gets the undecoded raw tag name, as present in the input stream.
215 #[inline]
216 pub fn name(&self) -> QName<'_> {
217 QName(&self.buf[..self.name_len])
218 }
219
220 /// Gets the undecoded raw local tag name (excluding namespace) as present
221 /// in the input stream.
222 ///
223 /// All content up to and including the first `:` character is removed from the tag name.
224 #[inline]
225 pub fn local_name(&self) -> LocalName<'_> {
226 self.name().into()
227 }
228
229 /// Edit the name of the BytesStart in-place
230 ///
231 /// # Warning
232 ///
233 /// `name` must be a valid name.
234 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
235 let bytes = self.buf.to_mut();
236 bytes.splice(..self.name_len, name.iter().cloned());
237 self.name_len = name.len();
238 self
239 }
240}
241
242/// Attribute-related methods
243impl<'a> BytesStart<'a> {
244 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
245 ///
246 /// The yielded items must be convertible to [`Attribute`] using `Into`.
247 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
248 where
249 I: IntoIterator,
250 I::Item: Into<Attribute<'b>>,
251 {
252 self.extend_attributes(attributes);
253 self
254 }
255
256 /// Add additional attributes to this tag using an iterator.
257 ///
258 /// The yielded items must be convertible to [`Attribute`] using `Into`.
259 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
260 where
261 I: IntoIterator,
262 I::Item: Into<Attribute<'b>>,
263 {
264 for attr in attributes {
265 self.push_attribute(attr);
266 }
267 self
268 }
269
270 /// Adds an attribute to this element.
271 pub fn push_attribute<'b, A>(&mut self, attr: A)
272 where
273 A: Into<Attribute<'b>>,
274 {
275 self.buf.to_mut().push(b' ');
276 self.push_attr(attr.into());
277 }
278
279 /// Remove all attributes from the ByteStart
280 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
281 self.buf.to_mut().truncate(self.name_len);
282 self
283 }
284
285 /// Returns an iterator over the attributes of this tag.
286 pub fn attributes(&self) -> Attributes<'_> {
287 Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
288 }
289
290 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
291 pub fn html_attributes(&self) -> Attributes<'_> {
292 Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
293 }
294
295 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
296 /// including the whitespace after the tag name if there is any.
297 #[inline]
298 pub fn attributes_raw(&self) -> &[u8] {
299 &self.buf[self.name_len..]
300 }
301
302 /// Try to get an attribute
303 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
304 &'a self,
305 attr_name: N,
306 ) -> Result<Option<Attribute<'a>>, AttrError> {
307 for a in self.attributes().with_checks(false) {
308 let a = a?;
309 if a.key.as_ref() == attr_name.as_ref() {
310 return Ok(Some(a));
311 }
312 }
313 Ok(None)
314 }
315
316 /// Adds an attribute to this element.
317 pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
318 let bytes = self.buf.to_mut();
319 bytes.extend_from_slice(attr.key.as_ref());
320 bytes.extend_from_slice(b"=\"");
321 // FIXME: need to escape attribute content
322 bytes.extend_from_slice(attr.value.as_ref());
323 bytes.push(b'"');
324 }
325
326 /// Adds new line in existing element
327 pub(crate) fn push_newline(&mut self) {
328 self.buf.to_mut().push(b'\n');
329 }
330
331 /// Adds indentation bytes in existing element
332 pub(crate) fn push_indent(&mut self, indent: &[u8]) {
333 self.buf.to_mut().extend_from_slice(indent);
334 }
335}
336
337impl<'a> Debug for BytesStart<'a> {
338 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
339 write!(f, "BytesStart {{ buf: ")?;
340 write_cow_string(f, &self.buf)?;
341 write!(f, ", name_len: {} }}", self.name_len)
342 }
343}
344
345impl<'a> Deref for BytesStart<'a> {
346 type Target = [u8];
347
348 fn deref(&self) -> &[u8] {
349 &self.buf
350 }
351}
352
353#[cfg(feature = "arbitrary")]
354impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
355 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
356 let s = <&str>::arbitrary(u)?;
357 if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
358 return Err(arbitrary::Error::IncorrectFormat);
359 }
360 let mut result = Self::new(s);
361 result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?);
362 Ok(result)
363 }
364
365 fn size_hint(depth: usize) -> (usize, Option<usize>) {
366 <&str as arbitrary::Arbitrary>::size_hint(depth)
367 }
368}
369
370////////////////////////////////////////////////////////////////////////////////////////////////////
371
372/// Closing tag data (`Event::End`): `</name>`.
373///
374/// The name can be accessed using the [`name`] or [`local_name`] methods.
375///
376/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
377/// returns the content of this event between `</` and `>`.
378///
379/// Note, that inner text will not contain `>` character inside:
380///
381/// ```
382/// # use quick_xml::events::{BytesEnd, Event};
383/// # use quick_xml::reader::Reader;
384/// # use pretty_assertions::assert_eq;
385/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
386/// // Note, that this entire string considered as a .name()
387/// let content = "element a1 = 'val1' a2=\"val2\" ";
388/// let event = BytesEnd::new(content);
389///
390/// reader.config_mut().trim_markup_names_in_closing_tags = false;
391/// reader.config_mut().check_end_names = false;
392/// reader.read_event().unwrap(); // Skip `<element>`
393///
394/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
395/// assert_eq!(event.name().as_ref(), content.as_bytes());
396/// // deref coercion of &BytesEnd to &[u8]
397/// assert_eq!(&event as &[u8], content.as_bytes());
398/// // AsRef<[u8]> for &T + deref coercion
399/// assert_eq!(event.as_ref(), content.as_bytes());
400/// ```
401///
402/// [`name`]: Self::name
403/// [`local_name`]: Self::local_name
404#[derive(Clone, Eq, PartialEq)]
405pub struct BytesEnd<'a> {
406 name: Cow<'a, [u8]>,
407}
408
409impl<'a> BytesEnd<'a> {
410 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
411 #[inline]
412 pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
413 BytesEnd { name }
414 }
415
416 /// Creates a new `BytesEnd` borrowing a slice.
417 ///
418 /// # Warning
419 ///
420 /// `name` must be a valid name.
421 #[inline]
422 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
423 Self::wrap(str_cow_to_bytes(name))
424 }
425
426 /// Converts the event into an owned event.
427 pub fn into_owned(self) -> BytesEnd<'static> {
428 BytesEnd {
429 name: Cow::Owned(self.name.into_owned()),
430 }
431 }
432
433 /// Converts the event into a borrowed event.
434 #[inline]
435 pub fn borrow(&self) -> BytesEnd<'_> {
436 BytesEnd {
437 name: Cow::Borrowed(&self.name),
438 }
439 }
440
441 /// Gets the undecoded raw tag name, as present in the input stream.
442 #[inline]
443 pub fn name(&self) -> QName<'_> {
444 QName(&self.name)
445 }
446
447 /// Gets the undecoded raw local tag name (excluding namespace) as present
448 /// in the input stream.
449 ///
450 /// All content up to and including the first `:` character is removed from the tag name.
451 #[inline]
452 pub fn local_name(&self) -> LocalName<'_> {
453 self.name().into()
454 }
455}
456
457impl<'a> Debug for BytesEnd<'a> {
458 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
459 write!(f, "BytesEnd {{ name: ")?;
460 write_cow_string(f, &self.name)?;
461 write!(f, " }}")
462 }
463}
464
465impl<'a> Deref for BytesEnd<'a> {
466 type Target = [u8];
467
468 fn deref(&self) -> &[u8] {
469 &self.name
470 }
471}
472
473impl<'a> From<QName<'a>> for BytesEnd<'a> {
474 #[inline]
475 fn from(name: QName<'a>) -> Self {
476 Self::wrap(name.into_inner().into())
477 }
478}
479
480#[cfg(feature = "arbitrary")]
481impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
482 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
483 Ok(Self::new(<&str>::arbitrary(u)?))
484 }
485 fn size_hint(depth: usize) -> (usize, Option<usize>) {
486 <&str as arbitrary::Arbitrary>::size_hint(depth)
487 }
488}
489
490////////////////////////////////////////////////////////////////////////////////////////////////////
491
492/// Data from various events (most notably, `Event::Text`) that stored in XML
493/// in escaped form. Internally data is stored in escaped form.
494///
495/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
496/// returns the content of this event. In case of comment this is everything
497/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
498/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
499/// (i.e. in case of DTD the first character is never space):
500///
501/// ```
502/// # use quick_xml::events::{BytesText, Event};
503/// # use quick_xml::reader::Reader;
504/// # use pretty_assertions::assert_eq;
505/// // Remember, that \ at the end of string literal strips
506/// // all space characters to the first non-space character
507/// let mut reader = Reader::from_str("\
508/// <!DOCTYPE comment or text >\
509/// comment or text \
510/// <!--comment or text -->"
511/// );
512/// let content = "comment or text ";
513/// let event = BytesText::new(content);
514///
515/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
516/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
517/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
518/// // deref coercion of &BytesText to &[u8]
519/// assert_eq!(&event as &[u8], content.as_bytes());
520/// // AsRef<[u8]> for &T + deref coercion
521/// assert_eq!(event.as_ref(), content.as_bytes());
522/// ```
523#[derive(Clone, Eq, PartialEq)]
524pub struct BytesText<'a> {
525 /// Escaped then encoded content of the event. Content is encoded in the XML
526 /// document encoding when event comes from the reader and should be in the
527 /// document encoding when event passed to the writer
528 content: Cow<'a, [u8]>,
529 /// Encoding in which the `content` is stored inside the event
530 decoder: Decoder,
531}
532
533impl<'a> BytesText<'a> {
534 /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
535 #[inline]
536 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
537 Self {
538 content: content.into(),
539 decoder,
540 }
541 }
542
543 /// Creates a new `BytesText` from an escaped string.
544 #[inline]
545 pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
546 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
547 }
548
549 /// Creates a new `BytesText` from a string. The string is expected not to
550 /// be escaped.
551 #[inline]
552 pub fn new(content: &'a str) -> Self {
553 Self::from_escaped(escape(content))
554 }
555
556 /// Ensures that all data is owned to extend the object's lifetime if
557 /// necessary.
558 #[inline]
559 pub fn into_owned(self) -> BytesText<'static> {
560 BytesText {
561 content: self.content.into_owned().into(),
562 decoder: self.decoder,
563 }
564 }
565
566 /// Extracts the inner `Cow` from the `BytesText` event container.
567 #[inline]
568 pub fn into_inner(self) -> Cow<'a, [u8]> {
569 self.content
570 }
571
572 /// Converts the event into a borrowed event.
573 #[inline]
574 pub fn borrow(&self) -> BytesText<'_> {
575 BytesText {
576 content: Cow::Borrowed(&self.content),
577 decoder: self.decoder,
578 }
579 }
580
581 /// Decodes the content of the event.
582 ///
583 /// This will allocate if the value contains any escape sequences or in
584 /// non-UTF-8 encoding.
585 ///
586 /// This method does not normalizes end-of-line characters as required by [specification].
587 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
588 ///
589 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
590 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
591 self.decoder.decode_cow(&self.content)
592 }
593
594 /// Decodes the content of the XML 1.0 or HTML event.
595 ///
596 /// When this event produced by the reader, it uses the encoding information
597 /// associated with that reader to interpret the raw bytes contained within
598 /// this text event.
599 ///
600 /// This will allocate if the value contains any escape sequences or in non-UTF-8
601 /// encoding, or EOL normalization is required.
602 ///
603 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
604 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
605 ///
606 /// This method also can be used to get HTML content, because rules the same.
607 ///
608 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
609 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
610 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
611 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
612 self.decoder.content(&self.content, normalize_xml10_eols)
613 }
614
615 /// Decodes the content of the XML 1.1 event.
616 ///
617 /// When this event produced by the reader, it uses the encoding information
618 /// associated with that reader to interpret the raw bytes contained within
619 /// this text event.
620 ///
621 /// This will allocate if the value contains any escape sequences or in non-UTF-8
622 /// encoding, or EOL normalization is required.
623 ///
624 /// Note, that this method should be used only if event represents XML 1.1 content,
625 /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
626 ///
627 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
628 ///
629 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
630 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
631 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
632 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
633 self.decoder.content(&self.content, normalize_xml11_eols)
634 }
635
636 /// Alias for [`xml11_content()`](Self::xml11_content).
637 #[inline]
638 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
639 self.xml11_content()
640 }
641
642 /// Alias for [`xml10_content()`](Self::xml10_content).
643 #[inline]
644 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
645 self.xml10_content()
646 }
647
648 /// Removes leading XML whitespace bytes from text content.
649 ///
650 /// Returns `true` if content is empty after that
651 pub fn inplace_trim_start(&mut self) -> bool {
652 self.content = trim_cow(
653 replace(&mut self.content, Cow::Borrowed(b"")),
654 trim_xml_start,
655 );
656 self.content.is_empty()
657 }
658
659 /// Removes trailing XML whitespace bytes from text content.
660 ///
661 /// Returns `true` if content is empty after that
662 pub fn inplace_trim_end(&mut self) -> bool {
663 self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
664 self.content.is_empty()
665 }
666}
667
668impl<'a> Debug for BytesText<'a> {
669 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
670 write!(f, "BytesText {{ content: ")?;
671 write_cow_string(f, &self.content)?;
672 write!(f, " }}")
673 }
674}
675
676impl<'a> Deref for BytesText<'a> {
677 type Target = [u8];
678
679 fn deref(&self) -> &[u8] {
680 &self.content
681 }
682}
683
684#[cfg(feature = "arbitrary")]
685impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
686 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
687 let s = <&str>::arbitrary(u)?;
688 if !s.chars().all(char::is_alphanumeric) {
689 return Err(arbitrary::Error::IncorrectFormat);
690 }
691 Ok(Self::new(s))
692 }
693
694 fn size_hint(depth: usize) -> (usize, Option<usize>) {
695 <&str as arbitrary::Arbitrary>::size_hint(depth)
696 }
697}
698
699////////////////////////////////////////////////////////////////////////////////////////////////////
700
701/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
702/// [convert](Self::escape) it to [`BytesText`].
703///
704/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
705/// returns the content of this event between `<![CDATA[` and `]]>`.
706///
707/// Note, that inner text will not contain `]]>` sequence inside:
708///
709/// ```
710/// # use quick_xml::events::{BytesCData, Event};
711/// # use quick_xml::reader::Reader;
712/// # use pretty_assertions::assert_eq;
713/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
714/// let content = " CDATA section ";
715/// let event = BytesCData::new(content);
716///
717/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
718/// // deref coercion of &BytesCData to &[u8]
719/// assert_eq!(&event as &[u8], content.as_bytes());
720/// // AsRef<[u8]> for &T + deref coercion
721/// assert_eq!(event.as_ref(), content.as_bytes());
722/// ```
723#[derive(Clone, Eq, PartialEq)]
724pub struct BytesCData<'a> {
725 content: Cow<'a, [u8]>,
726 /// Encoding in which the `content` is stored inside the event
727 decoder: Decoder,
728}
729
730impl<'a> BytesCData<'a> {
731 /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
732 #[inline]
733 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
734 Self {
735 content: content.into(),
736 decoder,
737 }
738 }
739
740 /// Creates a new `BytesCData` from a string.
741 ///
742 /// # Warning
743 ///
744 /// `content` must not contain the `]]>` sequence. You can use
745 /// [`BytesCData::escaped`] to escape the content instead.
746 #[inline]
747 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
748 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
749 }
750
751 /// Creates an iterator of `BytesCData` from a string.
752 ///
753 /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
754 /// sections, splitting the `]]` and `>` characters, because the CDATA closing
755 /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
756 /// for each of those sections.
757 ///
758 /// # Examples
759 ///
760 /// ```
761 /// # use quick_xml::events::BytesCData;
762 /// # use pretty_assertions::assert_eq;
763 /// let content = "";
764 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
765 /// assert_eq!(cdata, &[BytesCData::new("")]);
766 ///
767 /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
768 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
769 /// assert_eq!(cdata, &[
770 /// BytesCData::new("Certain tokens like ]]"),
771 /// BytesCData::new("> can be difficult and <invalid>"),
772 /// ]);
773 ///
774 /// let content = "foo]]>bar]]>baz]]>quux";
775 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
776 /// assert_eq!(cdata, &[
777 /// BytesCData::new("foo]]"),
778 /// BytesCData::new(">bar]]"),
779 /// BytesCData::new(">baz]]"),
780 /// BytesCData::new(">quux"),
781 /// ]);
782 /// ```
783 #[inline]
784 pub fn escaped(content: &'a str) -> CDataIterator<'a> {
785 CDataIterator {
786 inner: utils::CDataIterator::new(content),
787 }
788 }
789
790 /// Ensures that all data is owned to extend the object's lifetime if
791 /// necessary.
792 #[inline]
793 pub fn into_owned(self) -> BytesCData<'static> {
794 BytesCData {
795 content: self.content.into_owned().into(),
796 decoder: self.decoder,
797 }
798 }
799
800 /// Extracts the inner `Cow` from the `BytesCData` event container.
801 #[inline]
802 pub fn into_inner(self) -> Cow<'a, [u8]> {
803 self.content
804 }
805
806 /// Converts the event into a borrowed event.
807 #[inline]
808 pub fn borrow(&self) -> BytesCData<'_> {
809 BytesCData {
810 content: Cow::Borrowed(&self.content),
811 decoder: self.decoder,
812 }
813 }
814
815 /// Converts this CDATA content to an escaped version, that can be written
816 /// as an usual text in XML.
817 ///
818 /// This function performs following replacements:
819 ///
820 /// | Character | Replacement
821 /// |-----------|------------
822 /// | `<` | `<`
823 /// | `>` | `>`
824 /// | `&` | `&`
825 /// | `'` | `'`
826 /// | `"` | `"`
827 pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
828 let decoded = self.decode()?;
829 Ok(BytesText::wrap(
830 match escape(decoded) {
831 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
832 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
833 },
834 Decoder::utf8(),
835 ))
836 }
837
838 /// Converts this CDATA content to an escaped version, that can be written
839 /// as an usual text in XML.
840 ///
841 /// In XML text content, it is allowed (though not recommended) to leave
842 /// the quote special characters `"` and `'` unescaped.
843 ///
844 /// This function performs following replacements:
845 ///
846 /// | Character | Replacement
847 /// |-----------|------------
848 /// | `<` | `<`
849 /// | `>` | `>`
850 /// | `&` | `&`
851 pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
852 let decoded = self.decode()?;
853 Ok(BytesText::wrap(
854 match partial_escape(decoded) {
855 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
856 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
857 },
858 Decoder::utf8(),
859 ))
860 }
861
862 /// Converts this CDATA content to an escaped version, that can be written
863 /// as an usual text in XML. This method escapes only those characters that
864 /// must be escaped according to the [specification].
865 ///
866 /// This function performs following replacements:
867 ///
868 /// | Character | Replacement
869 /// |-----------|------------
870 /// | `<` | `<`
871 /// | `&` | `&`
872 ///
873 /// [specification]: https://www.w3.org/TR/xml11/#syntax
874 pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
875 let decoded = self.decode()?;
876 Ok(BytesText::wrap(
877 match minimal_escape(decoded) {
878 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
879 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
880 },
881 Decoder::utf8(),
882 ))
883 }
884
885 /// Decodes the raw input byte content of the CDATA section into a string,
886 /// without performing XML entity escaping.
887 ///
888 /// When this event produced by the XML reader, it uses the encoding information
889 /// associated with that reader to interpret the raw bytes contained within this
890 /// CDATA event.
891 ///
892 /// This method does not normalizes end-of-line characters as required by [specification].
893 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
894 ///
895 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
896 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
897 self.decoder.decode_cow(&self.content)
898 }
899
900 /// Decodes the raw input byte content of the CDATA section of the XML 1.0 or
901 /// HTML event into a string.
902 ///
903 /// When this event produced by the reader, it uses the encoding information
904 /// associated with that reader to interpret the raw bytes contained within
905 /// this CDATA event.
906 ///
907 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
908 /// is required.
909 ///
910 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
911 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
912 ///
913 /// This method also can be used to get HTML content, because rules the same.
914 ///
915 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
916 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
917 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
918 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
919 self.decoder.content(&self.content, normalize_xml10_eols)
920 }
921
922 /// Decodes the raw input byte content of the CDATA section of the XML 1.1 event
923 /// into a string.
924 ///
925 /// When this event produced by the reader, it uses the encoding information
926 /// associated with that reader to interpret the raw bytes contained within
927 /// this CDATA event.
928 ///
929 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
930 /// is required.
931 ///
932 /// Note, that this method should be used only if event represents XML 1.1 content,
933 /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
934 ///
935 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
936 ///
937 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
938 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
939 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
940 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
941 self.decoder.content(&self.content, normalize_xml11_eols)
942 }
943
944 /// Alias for [`xml11_content()`](Self::xml11_content).
945 #[inline]
946 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
947 self.xml11_content()
948 }
949
950 /// Alias for [`xml10_content()`](Self::xml10_content).
951 #[inline]
952 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
953 self.xml10_content()
954 }
955}
956
957impl<'a> Debug for BytesCData<'a> {
958 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
959 write!(f, "BytesCData {{ content: ")?;
960 write_cow_string(f, &self.content)?;
961 write!(f, " }}")
962 }
963}
964
965impl<'a> Deref for BytesCData<'a> {
966 type Target = [u8];
967
968 fn deref(&self) -> &[u8] {
969 &self.content
970 }
971}
972
973#[cfg(feature = "arbitrary")]
974impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
975 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
976 Ok(Self::new(<&str>::arbitrary(u)?))
977 }
978 fn size_hint(depth: usize) -> (usize, Option<usize>) {
979 <&str as arbitrary::Arbitrary>::size_hint(depth)
980 }
981}
982
983/// Iterator over `CDATA` sections in a string.
984///
985/// This iterator is created by the [`BytesCData::escaped`] method.
986#[derive(Debug, Clone)]
987pub struct CDataIterator<'a> {
988 inner: utils::CDataIterator<'a>,
989}
990
991impl<'a> Iterator for CDataIterator<'a> {
992 type Item = BytesCData<'a>;
993
994 fn next(&mut self) -> Option<BytesCData<'a>> {
995 self.inner
996 .next()
997 .map(|slice| BytesCData::wrap(slice.as_bytes(), Decoder::utf8()))
998 }
999}
1000
1001impl FusedIterator for CDataIterator<'_> {}
1002
1003////////////////////////////////////////////////////////////////////////////////////////////////////
1004
1005/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
1006///
1007/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1008/// returns the content of this event between `<?` and `?>`.
1009///
1010/// Note, that inner text will not contain `?>` sequence inside:
1011///
1012/// ```
1013/// # use quick_xml::events::{BytesPI, Event};
1014/// # use quick_xml::reader::Reader;
1015/// # use pretty_assertions::assert_eq;
1016/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
1017/// let content = "processing instruction >:-<~ ";
1018/// let event = BytesPI::new(content);
1019///
1020/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
1021/// // deref coercion of &BytesPI to &[u8]
1022/// assert_eq!(&event as &[u8], content.as_bytes());
1023/// // AsRef<[u8]> for &T + deref coercion
1024/// assert_eq!(event.as_ref(), content.as_bytes());
1025/// ```
1026///
1027/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
1028#[derive(Clone, Eq, PartialEq)]
1029pub struct BytesPI<'a> {
1030 content: BytesStart<'a>,
1031}
1032
1033impl<'a> BytesPI<'a> {
1034 /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1035 #[inline]
1036 pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
1037 Self {
1038 content: BytesStart::wrap(content, target_len, decoder),
1039 }
1040 }
1041
1042 /// Creates a new `BytesPI` from a string.
1043 ///
1044 /// # Warning
1045 ///
1046 /// `content` must not contain the `?>` sequence.
1047 #[inline]
1048 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1049 let buf = str_cow_to_bytes(content);
1050 let name_len = name_len(&buf);
1051 Self {
1052 content: BytesStart {
1053 buf,
1054 name_len,
1055 decoder: Decoder::utf8(),
1056 },
1057 }
1058 }
1059
1060 /// Ensures that all data is owned to extend the object's lifetime if
1061 /// necessary.
1062 #[inline]
1063 pub fn into_owned(self) -> BytesPI<'static> {
1064 BytesPI {
1065 content: self.content.into_owned(),
1066 }
1067 }
1068
1069 /// Extracts the inner `Cow` from the `BytesPI` event container.
1070 #[inline]
1071 pub fn into_inner(self) -> Cow<'a, [u8]> {
1072 self.content.buf
1073 }
1074
1075 /// Converts the event into a borrowed event.
1076 #[inline]
1077 pub fn borrow(&self) -> BytesPI<'_> {
1078 BytesPI {
1079 content: self.content.borrow(),
1080 }
1081 }
1082
1083 /// A target used to identify the application to which the instruction is directed.
1084 ///
1085 /// # Example
1086 ///
1087 /// ```
1088 /// # use pretty_assertions::assert_eq;
1089 /// use quick_xml::events::BytesPI;
1090 ///
1091 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1092 /// assert_eq!(instruction.target(), b"xml-stylesheet");
1093 /// ```
1094 #[inline]
1095 pub fn target(&self) -> &[u8] {
1096 self.content.name().0
1097 }
1098
1099 /// Content of the processing instruction. Contains everything between target
1100 /// name and the end of the instruction. A direct consequence is that the first
1101 /// character is always a space character.
1102 ///
1103 /// # Example
1104 ///
1105 /// ```
1106 /// # use pretty_assertions::assert_eq;
1107 /// use quick_xml::events::BytesPI;
1108 ///
1109 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1110 /// assert_eq!(instruction.content(), br#" href="style.css""#);
1111 /// ```
1112 #[inline]
1113 pub fn content(&self) -> &[u8] {
1114 self.content.attributes_raw()
1115 }
1116
1117 /// A view of the processing instructions' content as a list of key-value pairs.
1118 ///
1119 /// Key-value pairs are used in some processing instructions, for example in
1120 /// `<?xml-stylesheet?>`.
1121 ///
1122 /// Returned iterator does not validate attribute values as may required by
1123 /// target's rules. For example, it doesn't check that substring `?>` is not
1124 /// present in the attribute value. That shouldn't be the problem when event
1125 /// is produced by the reader, because reader detects end of processing instruction
1126 /// by the first `?>` sequence, as required by the specification, and therefore
1127 /// this sequence cannot appear inside it.
1128 ///
1129 /// # Example
1130 ///
1131 /// ```
1132 /// # use pretty_assertions::assert_eq;
1133 /// use std::borrow::Cow;
1134 /// use quick_xml::events::attributes::Attribute;
1135 /// use quick_xml::events::BytesPI;
1136 /// use quick_xml::name::QName;
1137 ///
1138 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1139 /// for attr in instruction.attributes() {
1140 /// assert_eq!(attr, Ok(Attribute {
1141 /// key: QName(b"href"),
1142 /// value: Cow::Borrowed(b"style.css"),
1143 /// }));
1144 /// }
1145 /// ```
1146 #[inline]
1147 pub fn attributes(&self) -> Attributes<'_> {
1148 self.content.attributes()
1149 }
1150}
1151
1152impl<'a> Debug for BytesPI<'a> {
1153 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1154 write!(f, "BytesPI {{ content: ")?;
1155 write_cow_string(f, &self.content.buf)?;
1156 write!(f, " }}")
1157 }
1158}
1159
1160impl<'a> Deref for BytesPI<'a> {
1161 type Target = [u8];
1162
1163 fn deref(&self) -> &[u8] {
1164 &self.content
1165 }
1166}
1167
1168#[cfg(feature = "arbitrary")]
1169impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1170 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1171 Ok(Self::new(<&str>::arbitrary(u)?))
1172 }
1173 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1174 <&str as arbitrary::Arbitrary>::size_hint(depth)
1175 }
1176}
1177
1178////////////////////////////////////////////////////////////////////////////////////////////////////
1179
1180/// An XML declaration (`Event::Decl`).
1181///
1182/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1183///
1184/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1185/// returns the content of this event between `<?` and `?>`.
1186///
1187/// Note, that inner text will not contain `?>` sequence inside:
1188///
1189/// ```
1190/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1191/// # use quick_xml::reader::Reader;
1192/// # use pretty_assertions::assert_eq;
1193/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1194/// let content = "xml version = '1.0' ";
1195/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1196///
1197/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1198/// // deref coercion of &BytesDecl to &[u8]
1199/// assert_eq!(&event as &[u8], content.as_bytes());
1200/// // AsRef<[u8]> for &T + deref coercion
1201/// assert_eq!(event.as_ref(), content.as_bytes());
1202/// ```
1203#[derive(Clone, Debug, Eq, PartialEq)]
1204pub struct BytesDecl<'a> {
1205 content: BytesStart<'a>,
1206}
1207
1208impl<'a> BytesDecl<'a> {
1209 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1210 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1211 /// attribute.
1212 ///
1213 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1214 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1215 /// the double quote character is not allowed in any of the attribute values.
1216 pub fn new(
1217 version: &str,
1218 encoding: Option<&str>,
1219 standalone: Option<&str>,
1220 ) -> BytesDecl<'static> {
1221 // Compute length of the buffer based on supplied attributes
1222 // ' encoding=""' => 12
1223 let encoding_attr_len = if let Some(xs) = encoding {
1224 12 + xs.len()
1225 } else {
1226 0
1227 };
1228 // ' standalone=""' => 14
1229 let standalone_attr_len = if let Some(xs) = standalone {
1230 14 + xs.len()
1231 } else {
1232 0
1233 };
1234 // 'xml version=""' => 14
1235 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1236
1237 buf.push_str("xml version=\"");
1238 buf.push_str(version);
1239
1240 if let Some(encoding_val) = encoding {
1241 buf.push_str("\" encoding=\"");
1242 buf.push_str(encoding_val);
1243 }
1244
1245 if let Some(standalone_val) = standalone {
1246 buf.push_str("\" standalone=\"");
1247 buf.push_str(standalone_val);
1248 }
1249 buf.push('"');
1250
1251 BytesDecl {
1252 content: BytesStart::from_content(buf, 3),
1253 }
1254 }
1255
1256 /// Creates a `BytesDecl` from a `BytesStart`
1257 pub const fn from_start(start: BytesStart<'a>) -> Self {
1258 Self { content: start }
1259 }
1260
1261 /// Gets xml version, excluding quotes (`'` or `"`).
1262 ///
1263 /// According to the [grammar], the version *must* be the first thing in the declaration.
1264 /// This method tries to extract the first thing in the declaration and return it.
1265 /// In case of multiple attributes value of the first one is returned.
1266 ///
1267 /// If version is missed in the declaration, or the first thing is not a version,
1268 /// [`IllFormedError::MissingDeclVersion`] will be returned.
1269 ///
1270 /// # Examples
1271 ///
1272 /// ```
1273 /// use quick_xml::errors::{Error, IllFormedError};
1274 /// use quick_xml::events::{BytesDecl, BytesStart};
1275 ///
1276 /// // <?xml version='1.1'?>
1277 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1278 /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1279 ///
1280 /// // <?xml version='1.0' version='1.1'?>
1281 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1282 /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1283 ///
1284 /// // <?xml encoding='utf-8'?>
1285 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1286 /// match decl.version() {
1287 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1288 /// _ => assert!(false),
1289 /// }
1290 ///
1291 /// // <?xml encoding='utf-8' version='1.1'?>
1292 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1293 /// match decl.version() {
1294 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1295 /// _ => assert!(false),
1296 /// }
1297 ///
1298 /// // <?xml?>
1299 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1300 /// match decl.version() {
1301 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1302 /// _ => assert!(false),
1303 /// }
1304 /// ```
1305 ///
1306 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1307 pub fn version(&self) -> Result<Cow<'_, [u8]>, Error> {
1308 // The version *must* be the first thing in the declaration.
1309 match self.content.attributes().with_checks(false).next() {
1310 Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1311 // first attribute was not "version"
1312 Some(Ok(a)) => {
1313 let found = from_utf8(a.key.as_ref())
1314 .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1315 .to_string();
1316 Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1317 found,
1318 ))))
1319 }
1320 // error parsing attributes
1321 Some(Err(e)) => Err(e.into()),
1322 // no attributes
1323 None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1324 }
1325 }
1326
1327 /// Gets xml encoding, excluding quotes (`'` or `"`).
1328 ///
1329 /// Although according to the [grammar] encoding must appear before `"standalone"`
1330 /// and after `"version"`, this method does not check that. The first occurrence
1331 /// of the attribute will be returned even if there are several. Also, method does
1332 /// not restrict symbols that can forming the encoding, so the returned encoding
1333 /// name may not correspond to the grammar.
1334 ///
1335 /// # Examples
1336 ///
1337 /// ```
1338 /// use std::borrow::Cow;
1339 /// use quick_xml::Error;
1340 /// use quick_xml::events::{BytesDecl, BytesStart};
1341 ///
1342 /// // <?xml version='1.1'?>
1343 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1344 /// assert!(decl.encoding().is_none());
1345 ///
1346 /// // <?xml encoding='utf-8'?>
1347 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1348 /// match decl.encoding() {
1349 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1350 /// _ => assert!(false),
1351 /// }
1352 ///
1353 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1354 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1355 /// match decl.encoding() {
1356 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1357 /// _ => assert!(false),
1358 /// }
1359 /// ```
1360 ///
1361 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1362 pub fn encoding(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1363 self.content
1364 .try_get_attribute("encoding")
1365 .map(|a| a.map(|a| a.value))
1366 .transpose()
1367 }
1368
1369 /// Gets xml standalone, excluding quotes (`'` or `"`).
1370 ///
1371 /// Although according to the [grammar] standalone flag must appear after `"version"`
1372 /// and `"encoding"`, this method does not check that. The first occurrence of the
1373 /// attribute will be returned even if there are several. Also, method does not
1374 /// restrict symbols that can forming the value, so the returned flag name may not
1375 /// correspond to the grammar.
1376 ///
1377 /// # Examples
1378 ///
1379 /// ```
1380 /// use std::borrow::Cow;
1381 /// use quick_xml::Error;
1382 /// use quick_xml::events::{BytesDecl, BytesStart};
1383 ///
1384 /// // <?xml version='1.1'?>
1385 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1386 /// assert!(decl.standalone().is_none());
1387 ///
1388 /// // <?xml standalone='yes'?>
1389 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1390 /// match decl.standalone() {
1391 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1392 /// _ => assert!(false),
1393 /// }
1394 ///
1395 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1396 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1397 /// match decl.standalone() {
1398 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1399 /// _ => assert!(false),
1400 /// }
1401 /// ```
1402 ///
1403 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1404 pub fn standalone(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1405 self.content
1406 .try_get_attribute("standalone")
1407 .map(|a| a.map(|a| a.value))
1408 .transpose()
1409 }
1410
1411 /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1412 /// algorithm.
1413 ///
1414 /// If encoding in not known, or `encoding` key was not found, returns `None`.
1415 /// In case of duplicated `encoding` key, encoding, corresponding to the first
1416 /// one, is returned.
1417 #[cfg(feature = "encoding")]
1418 pub fn encoder(&self) -> Option<&'static Encoding> {
1419 self.encoding()
1420 .and_then(|e| e.ok())
1421 .and_then(|e| Encoding::for_label(&e))
1422 }
1423
1424 /// Converts the event into an owned event.
1425 pub fn into_owned(self) -> BytesDecl<'static> {
1426 BytesDecl {
1427 content: self.content.into_owned(),
1428 }
1429 }
1430
1431 /// Converts the event into a borrowed event.
1432 #[inline]
1433 pub fn borrow(&self) -> BytesDecl<'_> {
1434 BytesDecl {
1435 content: self.content.borrow(),
1436 }
1437 }
1438}
1439
1440impl<'a> Deref for BytesDecl<'a> {
1441 type Target = [u8];
1442
1443 fn deref(&self) -> &[u8] {
1444 &self.content
1445 }
1446}
1447
1448#[cfg(feature = "arbitrary")]
1449impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1450 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1451 Ok(Self::new(
1452 <&str>::arbitrary(u)?,
1453 Option::<&str>::arbitrary(u)?,
1454 Option::<&str>::arbitrary(u)?,
1455 ))
1456 }
1457
1458 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1459 <&str as arbitrary::Arbitrary>::size_hint(depth)
1460 }
1461}
1462
1463////////////////////////////////////////////////////////////////////////////////////////////////////
1464
1465/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1466///
1467/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1468/// returns the content of this event between `&` and `;`:
1469///
1470/// ```
1471/// # use quick_xml::events::{BytesRef, Event};
1472/// # use quick_xml::reader::Reader;
1473/// # use pretty_assertions::assert_eq;
1474/// let mut reader = Reader::from_str(r#"&entity;"#);
1475/// let content = "entity";
1476/// let event = BytesRef::new(content);
1477///
1478/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1479/// // deref coercion of &BytesRef to &[u8]
1480/// assert_eq!(&event as &[u8], content.as_bytes());
1481/// // AsRef<[u8]> for &T + deref coercion
1482/// assert_eq!(event.as_ref(), content.as_bytes());
1483/// ```
1484#[derive(Clone, Eq, PartialEq)]
1485pub struct BytesRef<'a> {
1486 content: Cow<'a, [u8]>,
1487 /// Encoding in which the `content` is stored inside the event.
1488 decoder: Decoder,
1489}
1490
1491impl<'a> BytesRef<'a> {
1492 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1493 #[inline]
1494 pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1495 Self {
1496 content: Cow::Borrowed(content),
1497 decoder,
1498 }
1499 }
1500
1501 /// Creates a new `BytesRef` borrowing a slice.
1502 ///
1503 /// # Warning
1504 ///
1505 /// `name` must be a valid name.
1506 #[inline]
1507 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1508 Self {
1509 content: str_cow_to_bytes(name),
1510 decoder: Decoder::utf8(),
1511 }
1512 }
1513
1514 /// Converts the event into an owned event.
1515 pub fn into_owned(self) -> BytesRef<'static> {
1516 BytesRef {
1517 content: Cow::Owned(self.content.into_owned()),
1518 decoder: self.decoder,
1519 }
1520 }
1521
1522 /// Extracts the inner `Cow` from the `BytesRef` event container.
1523 #[inline]
1524 pub fn into_inner(self) -> Cow<'a, [u8]> {
1525 self.content
1526 }
1527
1528 /// Converts the event into a borrowed event.
1529 #[inline]
1530 pub fn borrow(&self) -> BytesRef<'_> {
1531 BytesRef {
1532 content: Cow::Borrowed(&self.content),
1533 decoder: self.decoder,
1534 }
1535 }
1536
1537 /// Decodes the content of the event.
1538 ///
1539 /// This will allocate if the value contains any escape sequences or in
1540 /// non-UTF-8 encoding.
1541 ///
1542 /// This method does not normalizes end-of-line characters as required by [specification].
1543 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
1544 ///
1545 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
1546 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1547 self.decoder.decode_cow(&self.content)
1548 }
1549
1550 /// Decodes the content of the XML 1.0 or HTML event.
1551 ///
1552 /// When this event produced by the reader, it uses the encoding information
1553 /// associated with that reader to interpret the raw bytes contained within
1554 /// this general reference event.
1555 ///
1556 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1557 /// is required.
1558 ///
1559 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
1560 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1561 ///
1562 /// This method also can be used to get HTML content, because rules the same.
1563 ///
1564 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1565 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1566 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1567 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1568 self.decoder.content(&self.content, normalize_xml10_eols)
1569 }
1570
1571 /// Decodes the content of the XML 1.1 event.
1572 ///
1573 /// When this event produced by the reader, it uses the encoding information
1574 /// associated with that reader to interpret the raw bytes contained within
1575 /// this general reference event.
1576 ///
1577 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1578 /// is required.
1579 ///
1580 /// Note, that this method should be used only if event represents XML 1.1 content,
1581 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1582 ///
1583 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
1584 ///
1585 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1586 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1587 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1588 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1589 self.decoder.content(&self.content, normalize_xml11_eols)
1590 }
1591
1592 /// Alias for [`xml11_content()`](Self::xml11_content).
1593 #[inline]
1594 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1595 self.xml11_content()
1596 }
1597
1598 /// Alias for [`xml10_content()`](Self::xml10_content).
1599 #[inline]
1600 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1601 self.xml10_content()
1602 }
1603
1604 /// Returns `true` if the specified reference represents the character reference
1605 /// (`&#<number>;`).
1606 ///
1607 /// ```
1608 /// # use quick_xml::events::BytesRef;
1609 /// # use pretty_assertions::assert_eq;
1610 /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1611 /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1612 /// assert_eq!(BytesRef::new("lt" ).is_char_ref(), false);
1613 /// ```
1614 pub fn is_char_ref(&self) -> bool {
1615 matches!(self.content.first(), Some(b'#'))
1616 }
1617
1618 /// If this reference represents character reference, then resolves it and
1619 /// returns the character, otherwise returns `None`.
1620 ///
1621 /// This method does not check if character is allowed for XML, in other words,
1622 /// well-formedness constraint [WFC: Legal Char] is not enforced.
1623 /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1624 ///
1625 /// ```
1626 /// # use quick_xml::events::BytesRef;
1627 /// # use pretty_assertions::assert_eq;
1628 /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1629 /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1630 /// assert_eq!(BytesRef::new("lt" ).resolve_char_ref().unwrap(), None);
1631 /// ```
1632 ///
1633 /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1634 pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1635 if let Some(num) = self.decode()?.strip_prefix('#') {
1636 let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1637 return Ok(Some(ch));
1638 }
1639 Ok(None)
1640 }
1641}
1642
1643impl<'a> Debug for BytesRef<'a> {
1644 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1645 write!(f, "BytesRef {{ content: ")?;
1646 write_cow_string(f, &self.content)?;
1647 write!(f, " }}")
1648 }
1649}
1650
1651impl<'a> Deref for BytesRef<'a> {
1652 type Target = [u8];
1653
1654 fn deref(&self) -> &[u8] {
1655 &self.content
1656 }
1657}
1658
1659#[cfg(feature = "arbitrary")]
1660impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1661 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1662 Ok(Self::new(<&str>::arbitrary(u)?))
1663 }
1664
1665 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1666 <&str as arbitrary::Arbitrary>::size_hint(depth)
1667 }
1668}
1669
1670////////////////////////////////////////////////////////////////////////////////////////////////////
1671
1672/// Event emitted by [`Reader::read_event_into`].
1673///
1674/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1675#[derive(Clone, Debug, Eq, PartialEq)]
1676#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1677pub enum Event<'a> {
1678 /// Start tag (with attributes) `<tag attr="value">`.
1679 Start(BytesStart<'a>),
1680 /// End tag `</tag>`.
1681 End(BytesEnd<'a>),
1682 /// Empty element tag (with attributes) `<tag attr="value" />`.
1683 Empty(BytesStart<'a>),
1684 /// Escaped character data between tags.
1685 Text(BytesText<'a>),
1686 /// Unescaped character data stored in `<![CDATA[...]]>`.
1687 CData(BytesCData<'a>),
1688 /// Comment `<!-- ... -->`.
1689 Comment(BytesText<'a>),
1690 /// XML declaration `<?xml ...?>`.
1691 Decl(BytesDecl<'a>),
1692 /// Processing instruction `<?...?>`.
1693 PI(BytesPI<'a>),
1694 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1695 DocType(BytesText<'a>),
1696 /// General reference `&entity;` in the textual data. Can be either an entity
1697 /// reference, or a character reference.
1698 GeneralRef(BytesRef<'a>),
1699 /// End of XML document.
1700 Eof,
1701}
1702
1703impl<'a> Event<'a> {
1704 /// Converts the event to an owned version, untied to the lifetime of
1705 /// buffer used when reading but incurring a new, separate allocation.
1706 pub fn into_owned(self) -> Event<'static> {
1707 match self {
1708 Event::Start(e) => Event::Start(e.into_owned()),
1709 Event::End(e) => Event::End(e.into_owned()),
1710 Event::Empty(e) => Event::Empty(e.into_owned()),
1711 Event::Text(e) => Event::Text(e.into_owned()),
1712 Event::Comment(e) => Event::Comment(e.into_owned()),
1713 Event::CData(e) => Event::CData(e.into_owned()),
1714 Event::Decl(e) => Event::Decl(e.into_owned()),
1715 Event::PI(e) => Event::PI(e.into_owned()),
1716 Event::DocType(e) => Event::DocType(e.into_owned()),
1717 Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1718 Event::Eof => Event::Eof,
1719 }
1720 }
1721
1722 /// Converts the event into a borrowed event.
1723 #[inline]
1724 pub fn borrow(&self) -> Event<'_> {
1725 match self {
1726 Event::Start(e) => Event::Start(e.borrow()),
1727 Event::End(e) => Event::End(e.borrow()),
1728 Event::Empty(e) => Event::Empty(e.borrow()),
1729 Event::Text(e) => Event::Text(e.borrow()),
1730 Event::Comment(e) => Event::Comment(e.borrow()),
1731 Event::CData(e) => Event::CData(e.borrow()),
1732 Event::Decl(e) => Event::Decl(e.borrow()),
1733 Event::PI(e) => Event::PI(e.borrow()),
1734 Event::DocType(e) => Event::DocType(e.borrow()),
1735 Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1736 Event::Eof => Event::Eof,
1737 }
1738 }
1739}
1740
1741impl<'a> Deref for Event<'a> {
1742 type Target = [u8];
1743
1744 fn deref(&self) -> &[u8] {
1745 match *self {
1746 Event::Start(ref e) | Event::Empty(ref e) => e,
1747 Event::End(ref e) => e,
1748 Event::Text(ref e) => e,
1749 Event::Decl(ref e) => e,
1750 Event::PI(ref e) => e,
1751 Event::CData(ref e) => e,
1752 Event::Comment(ref e) => e,
1753 Event::DocType(ref e) => e,
1754 Event::GeneralRef(ref e) => e,
1755 Event::Eof => &[],
1756 }
1757 }
1758}
1759
1760impl<'a> AsRef<Event<'a>> for Event<'a> {
1761 fn as_ref(&self) -> &Event<'a> {
1762 self
1763 }
1764}
1765
1766////////////////////////////////////////////////////////////////////////////////////////////////////
1767
1768#[inline]
1769fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1770 match content.into() {
1771 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1772 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1773 }
1774}
1775
1776fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1777where
1778 F: FnOnce(&[u8]) -> &[u8],
1779{
1780 match value {
1781 Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1782 Cow::Owned(mut bytes) => {
1783 let trimmed = trim(&bytes);
1784 if trimmed.len() != bytes.len() {
1785 bytes = trimmed.to_vec();
1786 }
1787 Cow::Owned(bytes)
1788 }
1789 }
1790}
1791
1792#[cfg(test)]
1793mod test {
1794 use super::*;
1795 use pretty_assertions::assert_eq;
1796
1797 #[test]
1798 fn bytestart_create() {
1799 let b = BytesStart::new("test");
1800 assert_eq!(b.len(), 4);
1801 assert_eq!(b.name(), QName(b"test"));
1802 }
1803
1804 #[test]
1805 fn bytestart_set_name() {
1806 let mut b = BytesStart::new("test");
1807 assert_eq!(b.len(), 4);
1808 assert_eq!(b.name(), QName(b"test"));
1809 assert_eq!(b.attributes_raw(), b"");
1810 b.push_attribute(("x", "a"));
1811 assert_eq!(b.len(), 10);
1812 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1813 b.set_name(b"g");
1814 assert_eq!(b.len(), 7);
1815 assert_eq!(b.name(), QName(b"g"));
1816 }
1817
1818 #[test]
1819 fn bytestart_clear_attributes() {
1820 let mut b = BytesStart::new("test");
1821 b.push_attribute(("x", "y\"z"));
1822 b.push_attribute(("x", "y\"z"));
1823 b.clear_attributes();
1824 assert!(b.attributes().next().is_none());
1825 assert_eq!(b.len(), 4);
1826 assert_eq!(b.name(), QName(b"test"));
1827 }
1828}