quick_xml/events/mod.rs
1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52 escape, minimal_escape, normalize_xml10_eols, normalize_xml11_eols, parse_number,
53 partial_escape, EscapeError,
54};
55use crate::name::{LocalName, QName};
56use crate::utils::{self, name_len, trim_xml_end, trim_xml_start, write_cow_string};
57use attributes::{AttrError, Attribute, Attributes};
58
59/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
60///
61/// The name can be accessed using the [`name`] or [`local_name`] methods.
62/// An iterator over the attributes is returned by the [`attributes`] method.
63///
64/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
65/// returns the content of this event between `<` and `>` or `/>`:
66///
67/// ```
68/// # use quick_xml::events::{BytesStart, Event};
69/// # use quick_xml::reader::Reader;
70/// # use pretty_assertions::assert_eq;
71/// // Remember, that \ at the end of string literal strips
72/// // all space characters to the first non-space character
73/// let mut reader = Reader::from_str("\
74/// <element a1 = 'val1' a2=\"val2\" />\
75/// <element a1 = 'val1' a2=\"val2\" >"
76/// );
77/// let content = "element a1 = 'val1' a2=\"val2\" ";
78/// let event = BytesStart::from_content(content, 7);
79///
80/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
81/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
82/// // deref coercion of &BytesStart to &[u8]
83/// assert_eq!(&event as &[u8], content.as_bytes());
84/// // AsRef<[u8]> for &T + deref coercion
85/// assert_eq!(event.as_ref(), content.as_bytes());
86/// ```
87///
88/// [`name`]: Self::name
89/// [`local_name`]: Self::local_name
90/// [`attributes`]: Self::attributes
91#[derive(Clone, Eq, PartialEq)]
92pub struct BytesStart<'a> {
93 /// content of the element, before any utf8 conversion
94 pub(crate) buf: Cow<'a, [u8]>,
95 /// end of the element name, the name starts at that the start of `buf`
96 pub(crate) name_len: usize,
97 /// Encoding used for `buf`
98 decoder: Decoder,
99}
100
101impl<'a> BytesStart<'a> {
102 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
103 #[inline]
104 pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
105 BytesStart {
106 buf: Cow::Borrowed(content),
107 name_len,
108 decoder,
109 }
110 }
111
112 /// Creates a new `BytesStart` from the given name.
113 ///
114 /// # Warning
115 ///
116 /// `name` must be a valid name.
117 #[inline]
118 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
119 let buf = str_cow_to_bytes(name);
120 BytesStart {
121 name_len: buf.len(),
122 buf,
123 decoder: Decoder::utf8(),
124 }
125 }
126
127 /// Creates a new `BytesStart` from the given content (name + attributes).
128 ///
129 /// # Warning
130 ///
131 /// `&content[..name_len]` must be a valid name, and the remainder of `content`
132 /// must be correctly-formed attributes. Neither are checked, it is possible
133 /// to generate invalid XML if `content` or `name_len` are incorrect.
134 #[inline]
135 pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
136 BytesStart {
137 buf: str_cow_to_bytes(content),
138 name_len,
139 decoder: Decoder::utf8(),
140 }
141 }
142
143 /// Converts the event into an owned event.
144 pub fn into_owned(self) -> BytesStart<'static> {
145 BytesStart {
146 buf: Cow::Owned(self.buf.into_owned()),
147 name_len: self.name_len,
148 decoder: self.decoder,
149 }
150 }
151
152 /// Converts the event into an owned event without taking ownership of Event
153 pub fn to_owned(&self) -> BytesStart<'static> {
154 BytesStart {
155 buf: Cow::Owned(self.buf.clone().into_owned()),
156 name_len: self.name_len,
157 decoder: self.decoder,
158 }
159 }
160
161 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
162 ///
163 /// # Example
164 ///
165 /// ```
166 /// use quick_xml::events::{BytesStart, Event};
167 /// # use quick_xml::writer::Writer;
168 /// # use quick_xml::Error;
169 ///
170 /// struct SomeStruct<'a> {
171 /// attrs: BytesStart<'a>,
172 /// // ...
173 /// }
174 /// # impl<'a> SomeStruct<'a> {
175 /// # fn example(&self) -> Result<(), Error> {
176 /// # let mut writer = Writer::new(Vec::new());
177 ///
178 /// writer.write_event(Event::Start(self.attrs.borrow()))?;
179 /// // ...
180 /// writer.write_event(Event::End(self.attrs.to_end()))?;
181 /// # Ok(())
182 /// # }}
183 /// ```
184 ///
185 /// [`to_end`]: Self::to_end
186 pub fn borrow(&self) -> BytesStart<'_> {
187 BytesStart {
188 buf: Cow::Borrowed(&self.buf),
189 name_len: self.name_len,
190 decoder: self.decoder,
191 }
192 }
193
194 /// Creates new paired close tag
195 #[inline]
196 pub fn to_end(&self) -> BytesEnd<'_> {
197 BytesEnd::from(self.name())
198 }
199
200 /// Get the decoder, used to decode bytes, read by the reader which produces
201 /// this event, to the strings.
202 ///
203 /// When event was created manually, encoding is UTF-8.
204 ///
205 /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
206 /// defaults to UTF-8.
207 ///
208 /// [`encoding`]: ../index.html#encoding
209 #[inline]
210 pub const fn decoder(&self) -> Decoder {
211 self.decoder
212 }
213
214 /// Gets the undecoded raw tag name, as present in the input stream.
215 #[inline]
216 pub fn name(&self) -> QName<'_> {
217 QName(&self.buf[..self.name_len])
218 }
219
220 /// Gets the undecoded raw local tag name (excluding namespace) as present
221 /// in the input stream.
222 ///
223 /// All content up to and including the first `:` character is removed from the tag name.
224 #[inline]
225 pub fn local_name(&self) -> LocalName<'_> {
226 self.name().into()
227 }
228
229 /// Edit the name of the BytesStart in-place
230 ///
231 /// # Warning
232 ///
233 /// `name` must be a valid name.
234 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
235 let bytes = self.buf.to_mut();
236 bytes.splice(..self.name_len, name.iter().cloned());
237 self.name_len = name.len();
238 self
239 }
240}
241
242/// Attribute-related methods
243impl<'a> BytesStart<'a> {
244 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
245 ///
246 /// The yielded items must be convertible to [`Attribute`] using `Into`.
247 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
248 where
249 I: IntoIterator,
250 I::Item: Into<Attribute<'b>>,
251 {
252 self.extend_attributes(attributes);
253 self
254 }
255
256 /// Add additional attributes to this tag using an iterator.
257 ///
258 /// The yielded items must be convertible to [`Attribute`] using `Into`.
259 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
260 where
261 I: IntoIterator,
262 I::Item: Into<Attribute<'b>>,
263 {
264 for attr in attributes {
265 self.push_attribute(attr);
266 }
267 self
268 }
269
270 /// Adds an attribute to this element.
271 pub fn push_attribute<'b, A>(&mut self, attr: A)
272 where
273 A: Into<Attribute<'b>>,
274 {
275 self.buf.to_mut().push(b' ');
276 self.push_attr(attr.into());
277 }
278
279 /// Remove all attributes from the ByteStart
280 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
281 self.buf.to_mut().truncate(self.name_len);
282 self
283 }
284
285 /// Returns an iterator over the attributes of this tag.
286 pub fn attributes(&self) -> Attributes<'_> {
287 Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
288 }
289
290 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
291 pub fn html_attributes(&self) -> Attributes<'_> {
292 Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
293 }
294
295 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
296 /// including the whitespace after the tag name if there is any.
297 #[inline]
298 pub fn attributes_raw(&self) -> &[u8] {
299 &self.buf[self.name_len..]
300 }
301
302 /// Try to get an attribute
303 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
304 &'a self,
305 attr_name: N,
306 ) -> Result<Option<Attribute<'a>>, AttrError> {
307 for a in self.attributes().with_checks(false) {
308 let a = a?;
309 if a.key.as_ref() == attr_name.as_ref() {
310 return Ok(Some(a));
311 }
312 }
313 Ok(None)
314 }
315
316 /// Adds an attribute to this element.
317 pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
318 let bytes = self.buf.to_mut();
319 bytes.extend_from_slice(attr.key.as_ref());
320 bytes.extend_from_slice(b"=\"");
321 // FIXME: need to escape attribute content
322 bytes.extend_from_slice(attr.value.as_ref());
323 bytes.push(b'"');
324 }
325
326 /// Adds new line in existing element
327 pub(crate) fn push_newline(&mut self) {
328 self.buf.to_mut().push(b'\n');
329 }
330
331 /// Adds indentation bytes in existing element
332 pub(crate) fn push_indent(&mut self, indent: &[u8]) {
333 self.buf.to_mut().extend_from_slice(indent);
334 }
335}
336
337impl<'a> Debug for BytesStart<'a> {
338 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
339 write!(f, "BytesStart {{ buf: ")?;
340 write_cow_string(f, &self.buf)?;
341 write!(f, ", name_len: {} }}", self.name_len)
342 }
343}
344
345impl<'a> Deref for BytesStart<'a> {
346 type Target = [u8];
347
348 fn deref(&self) -> &[u8] {
349 &self.buf
350 }
351}
352
353#[cfg(feature = "arbitrary")]
354impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
355 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
356 let s = <&str>::arbitrary(u)?;
357 if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
358 return Err(arbitrary::Error::IncorrectFormat);
359 }
360 let mut result = Self::new(s);
361 result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?);
362 Ok(result)
363 }
364
365 fn size_hint(depth: usize) -> (usize, Option<usize>) {
366 <&str as arbitrary::Arbitrary>::size_hint(depth)
367 }
368}
369
370////////////////////////////////////////////////////////////////////////////////////////////////////
371
372/// Closing tag data (`Event::End`): `</name>`.
373///
374/// The name can be accessed using the [`name`] or [`local_name`] methods.
375///
376/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
377/// returns the content of this event between `</` and `>`.
378///
379/// Note, that inner text will not contain `>` character inside:
380///
381/// ```
382/// # use quick_xml::events::{BytesEnd, Event};
383/// # use quick_xml::reader::Reader;
384/// # use pretty_assertions::assert_eq;
385/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
386/// // Note, that this entire string considered as a .name()
387/// let content = "element a1 = 'val1' a2=\"val2\" ";
388/// let event = BytesEnd::new(content);
389///
390/// reader.config_mut().trim_markup_names_in_closing_tags = false;
391/// reader.config_mut().check_end_names = false;
392/// reader.read_event().unwrap(); // Skip `<element>`
393///
394/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
395/// assert_eq!(event.name().as_ref(), content.as_bytes());
396/// // deref coercion of &BytesEnd to &[u8]
397/// assert_eq!(&event as &[u8], content.as_bytes());
398/// // AsRef<[u8]> for &T + deref coercion
399/// assert_eq!(event.as_ref(), content.as_bytes());
400/// ```
401///
402/// [`name`]: Self::name
403/// [`local_name`]: Self::local_name
404#[derive(Clone, Eq, PartialEq)]
405pub struct BytesEnd<'a> {
406 name: Cow<'a, [u8]>,
407}
408
409impl<'a> BytesEnd<'a> {
410 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
411 #[inline]
412 pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
413 BytesEnd { name }
414 }
415
416 /// Creates a new `BytesEnd` borrowing a slice.
417 ///
418 /// # Warning
419 ///
420 /// `name` must be a valid name.
421 #[inline]
422 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
423 Self::wrap(str_cow_to_bytes(name))
424 }
425
426 /// Converts the event into an owned event.
427 pub fn into_owned(self) -> BytesEnd<'static> {
428 BytesEnd {
429 name: Cow::Owned(self.name.into_owned()),
430 }
431 }
432
433 /// Converts the event into a borrowed event.
434 #[inline]
435 pub fn borrow(&self) -> BytesEnd<'_> {
436 BytesEnd {
437 name: Cow::Borrowed(&self.name),
438 }
439 }
440
441 /// Gets the undecoded raw tag name, as present in the input stream.
442 #[inline]
443 pub fn name(&self) -> QName<'_> {
444 QName(&self.name)
445 }
446
447 /// Gets the undecoded raw local tag name (excluding namespace) as present
448 /// in the input stream.
449 ///
450 /// All content up to and including the first `:` character is removed from the tag name.
451 #[inline]
452 pub fn local_name(&self) -> LocalName<'_> {
453 self.name().into()
454 }
455}
456
457impl<'a> Debug for BytesEnd<'a> {
458 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
459 write!(f, "BytesEnd {{ name: ")?;
460 write_cow_string(f, &self.name)?;
461 write!(f, " }}")
462 }
463}
464
465impl<'a> Deref for BytesEnd<'a> {
466 type Target = [u8];
467
468 fn deref(&self) -> &[u8] {
469 &self.name
470 }
471}
472
473impl<'a> From<QName<'a>> for BytesEnd<'a> {
474 #[inline]
475 fn from(name: QName<'a>) -> Self {
476 Self::wrap(name.into_inner().into())
477 }
478}
479
480#[cfg(feature = "arbitrary")]
481impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
482 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
483 Ok(Self::new(<&str>::arbitrary(u)?))
484 }
485 fn size_hint(depth: usize) -> (usize, Option<usize>) {
486 <&str as arbitrary::Arbitrary>::size_hint(depth)
487 }
488}
489
490////////////////////////////////////////////////////////////////////////////////////////////////////
491
492/// Data from various events (most notably, `Event::Text`).
493///
494/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
495/// returns the content of this event. In case of comment this is everything
496/// between `<!--` and `-->` and the text of comment may not contain `-->` inside
497/// (if [`Config::check_comments`] is set to `true`).
498/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
499/// (i.e. in case of DTD the first character is never space):
500///
501/// ```
502/// # use quick_xml::events::{BytesText, Event};
503/// # use quick_xml::reader::Reader;
504/// # use pretty_assertions::assert_eq;
505/// // Remember, that \ at the end of string literal strips
506/// // all space characters to the first non-space character
507/// let mut reader = Reader::from_str("\
508/// <!DOCTYPE comment or text >\
509/// comment or text \
510/// <!--comment or text -->"
511/// );
512/// let content = "comment or text ";
513/// let event = BytesText::new(content);
514///
515/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
516/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
517/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
518/// // deref coercion of &BytesText to &[u8]
519/// assert_eq!(&event as &[u8], content.as_bytes());
520/// // AsRef<[u8]> for &T + deref coercion
521/// assert_eq!(event.as_ref(), content.as_bytes());
522/// ```
523///
524/// [`Config::check_comments`]: crate::reader::Config::check_comments
525#[derive(Clone, Eq, PartialEq)]
526pub struct BytesText<'a> {
527 /// Escaped then encoded content of the event. Content is encoded in the XML
528 /// document encoding when event comes from the reader and should be in the
529 /// document encoding when event passed to the writer
530 content: Cow<'a, [u8]>,
531 /// Encoding in which the `content` is stored inside the event
532 decoder: Decoder,
533}
534
535impl<'a> BytesText<'a> {
536 /// Creates a new `BytesText` from a raw byte sequence as it appeared in th XML
537 /// source in the specified encoding.
538 #[inline]
539 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
540 Self {
541 content: content.into(),
542 decoder,
543 }
544 }
545
546 /// Creates a new `BytesText` from a raw string as it appeared in the XML source.
547 ///
548 /// # Warning
549 ///
550 /// `content` is not checked to not contain markup or entity references. Be warned
551 /// that writing such event may result to invalid XML if your content contains not
552 /// defined entity references or invalid XML markup.
553 ///
554 /// `content` may have any EOLs, they will be normalized when using [`xml_content()`] getters.
555 ///
556 /// [`xml_content()`]: Self::xml_content
557 #[inline]
558 pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
559 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
560 }
561
562 /// Creates a new `BytesText` from a string.
563 ///
564 /// # Warning
565 ///
566 /// `content` will be escaped using the [`escape`] function, but that may change
567 /// in the future, because events produced by the reader never contains `&` or `<`,
568 /// and escaping of `>`, `"` and `'` is not required. If you want to preserve exact
569 /// content, use [`from_escaped()`] method, but be warned that writing such event
570 /// may result to invalid XML if your content contains not defined entity references
571 /// or invalid XML markup.
572 ///
573 /// `content` may have any EOLs, they will be normalized when using [`xml_content()`] getters.
574 ///
575 /// [`escape`]: crate::escape::escape
576 /// [`from_escaped()`]: Self::from_escaped
577 /// [`xml_content()`]: Self::xml_content
578 #[inline]
579 pub fn new(content: &'a str) -> Self {
580 Self::from_escaped(escape(content))
581 }
582
583 /// Ensures that all data is owned to extend the object's lifetime if
584 /// necessary.
585 #[inline]
586 pub fn into_owned(self) -> BytesText<'static> {
587 BytesText {
588 content: self.content.into_owned().into(),
589 decoder: self.decoder,
590 }
591 }
592
593 /// Extracts the inner `Cow` from the `BytesText` event container.
594 #[inline]
595 pub fn into_inner(self) -> Cow<'a, [u8]> {
596 self.content
597 }
598
599 /// Converts the event into a borrowed event.
600 #[inline]
601 pub fn borrow(&self) -> BytesText<'_> {
602 BytesText {
603 content: Cow::Borrowed(&self.content),
604 decoder: self.decoder,
605 }
606 }
607
608 /// Decodes the content of the event.
609 ///
610 /// This will allocate if the value is encoded in non-UTF-8 encoding.
611 ///
612 /// This method does not normalizes end-of-line characters as required by [specification].
613 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
614 ///
615 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
616 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
617 self.decoder.decode_cow(&self.content)
618 }
619
620 /// Decodes the content of the XML 1.0 or HTML event.
621 ///
622 /// When this event produced by the reader, it uses the encoding information
623 /// associated with that reader to interpret the raw bytes contained within
624 /// this text event.
625 ///
626 /// This will allocate if the value is encoded in non-UTF-8 encoding, or EOL normalization is required.
627 ///
628 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
629 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
630 ///
631 /// This method also can be used to get HTML content, because rules the same.
632 ///
633 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
634 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
635 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
636 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
637 self.decoder.content(&self.content, normalize_xml10_eols)
638 }
639
640 /// Decodes the content of the XML 1.1 event.
641 ///
642 /// When this event produced by the reader, it uses the encoding information
643 /// associated with that reader to interpret the raw bytes contained within
644 /// this text event.
645 ///
646 /// This will allocate if the value is encoded in non-UTF-8 encoding, or EOL normalization is required.
647 ///
648 /// Note, that this method should be used only if event represents XML 1.1 content,
649 /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
650 ///
651 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
652 ///
653 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
654 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
655 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
656 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
657 self.decoder.content(&self.content, normalize_xml11_eols)
658 }
659
660 /// Alias for [`xml11_content()`](Self::xml11_content).
661 #[inline]
662 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
663 self.xml11_content()
664 }
665
666 /// Alias for [`xml10_content()`](Self::xml10_content).
667 #[inline]
668 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
669 self.xml10_content()
670 }
671
672 /// Removes leading XML whitespace bytes from text content.
673 ///
674 /// Returns `true` if content is empty after that
675 pub fn inplace_trim_start(&mut self) -> bool {
676 self.content = trim_cow(
677 replace(&mut self.content, Cow::Borrowed(b"")),
678 trim_xml_start,
679 );
680 self.content.is_empty()
681 }
682
683 /// Removes trailing XML whitespace bytes from text content.
684 ///
685 /// Returns `true` if content is empty after that
686 pub fn inplace_trim_end(&mut self) -> bool {
687 self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
688 self.content.is_empty()
689 }
690}
691
692impl<'a> Debug for BytesText<'a> {
693 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
694 write!(f, "BytesText {{ content: ")?;
695 write_cow_string(f, &self.content)?;
696 write!(f, " }}")
697 }
698}
699
700impl<'a> Deref for BytesText<'a> {
701 type Target = [u8];
702
703 fn deref(&self) -> &[u8] {
704 &self.content
705 }
706}
707
708#[cfg(feature = "arbitrary")]
709impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
710 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
711 let s = <&str>::arbitrary(u)?;
712 if !s.chars().all(char::is_alphanumeric) {
713 return Err(arbitrary::Error::IncorrectFormat);
714 }
715 Ok(Self::new(s))
716 }
717
718 fn size_hint(depth: usize) -> (usize, Option<usize>) {
719 <&str as arbitrary::Arbitrary>::size_hint(depth)
720 }
721}
722
723////////////////////////////////////////////////////////////////////////////////////////////////////
724
725/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
726/// [convert](Self::escape) it to [`BytesText`].
727///
728/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
729/// returns the content of this event between `<![CDATA[` and `]]>`.
730///
731/// Note, that inner text will not contain `]]>` sequence inside:
732///
733/// ```
734/// # use quick_xml::events::{BytesCData, Event};
735/// # use quick_xml::reader::Reader;
736/// # use pretty_assertions::assert_eq;
737/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
738/// let content = " CDATA section ";
739/// let event = BytesCData::new(content);
740///
741/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
742/// // deref coercion of &BytesCData to &[u8]
743/// assert_eq!(&event as &[u8], content.as_bytes());
744/// // AsRef<[u8]> for &T + deref coercion
745/// assert_eq!(event.as_ref(), content.as_bytes());
746/// ```
747#[derive(Clone, Eq, PartialEq)]
748pub struct BytesCData<'a> {
749 content: Cow<'a, [u8]>,
750 /// Encoding in which the `content` is stored inside the event
751 decoder: Decoder,
752}
753
754impl<'a> BytesCData<'a> {
755 /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
756 #[inline]
757 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
758 Self {
759 content: content.into(),
760 decoder,
761 }
762 }
763
764 /// Creates a new `BytesCData` from a string.
765 ///
766 /// # Warning
767 ///
768 /// `content` must not contain the `]]>` sequence. You can use
769 /// [`BytesCData::escaped`] to escape the content instead.
770 #[inline]
771 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
772 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
773 }
774
775 /// Creates an iterator of `BytesCData` from a string.
776 ///
777 /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
778 /// sections, splitting the `]]` and `>` characters, because the CDATA closing
779 /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
780 /// for each of those sections.
781 ///
782 /// # Examples
783 ///
784 /// ```
785 /// # use quick_xml::events::BytesCData;
786 /// # use pretty_assertions::assert_eq;
787 /// let content = "";
788 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
789 /// assert_eq!(cdata, &[BytesCData::new("")]);
790 ///
791 /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
792 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
793 /// assert_eq!(cdata, &[
794 /// BytesCData::new("Certain tokens like ]]"),
795 /// BytesCData::new("> can be difficult and <invalid>"),
796 /// ]);
797 ///
798 /// let content = "foo]]>bar]]>baz]]>quux";
799 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
800 /// assert_eq!(cdata, &[
801 /// BytesCData::new("foo]]"),
802 /// BytesCData::new(">bar]]"),
803 /// BytesCData::new(">baz]]"),
804 /// BytesCData::new(">quux"),
805 /// ]);
806 /// ```
807 #[inline]
808 pub fn escaped(content: &'a str) -> CDataIterator<'a> {
809 CDataIterator {
810 inner: utils::CDataIterator::new(content),
811 }
812 }
813
814 /// Ensures that all data is owned to extend the object's lifetime if
815 /// necessary.
816 #[inline]
817 pub fn into_owned(self) -> BytesCData<'static> {
818 BytesCData {
819 content: self.content.into_owned().into(),
820 decoder: self.decoder,
821 }
822 }
823
824 /// Extracts the inner `Cow` from the `BytesCData` event container.
825 #[inline]
826 pub fn into_inner(self) -> Cow<'a, [u8]> {
827 self.content
828 }
829
830 /// Converts the event into a borrowed event.
831 #[inline]
832 pub fn borrow(&self) -> BytesCData<'_> {
833 BytesCData {
834 content: Cow::Borrowed(&self.content),
835 decoder: self.decoder,
836 }
837 }
838
839 /// Converts this CDATA content to an escaped version, that can be written
840 /// as an usual text in XML.
841 ///
842 /// This function performs following replacements:
843 ///
844 /// | Character | Replacement
845 /// |-----------|------------
846 /// | `<` | `<`
847 /// | `>` | `>`
848 /// | `&` | `&`
849 /// | `'` | `'`
850 /// | `"` | `"`
851 pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
852 let decoded = self.decode()?;
853 Ok(BytesText::wrap(
854 match escape(decoded) {
855 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
856 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
857 },
858 Decoder::utf8(),
859 ))
860 }
861
862 /// Converts this CDATA content to an escaped version, that can be written
863 /// as an usual text in XML.
864 ///
865 /// In XML text content, it is allowed (though not recommended) to leave
866 /// the quote special characters `"` and `'` unescaped.
867 ///
868 /// This function performs following replacements:
869 ///
870 /// | Character | Replacement
871 /// |-----------|------------
872 /// | `<` | `<`
873 /// | `>` | `>`
874 /// | `&` | `&`
875 pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
876 let decoded = self.decode()?;
877 Ok(BytesText::wrap(
878 match partial_escape(decoded) {
879 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
880 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
881 },
882 Decoder::utf8(),
883 ))
884 }
885
886 /// Converts this CDATA content to an escaped version, that can be written
887 /// as an usual text in XML. This method escapes only those characters that
888 /// must be escaped according to the [specification].
889 ///
890 /// This function performs following replacements:
891 ///
892 /// | Character | Replacement
893 /// |-----------|------------
894 /// | `<` | `<`
895 /// | `&` | `&`
896 ///
897 /// [specification]: https://www.w3.org/TR/xml11/#syntax
898 pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
899 let decoded = self.decode()?;
900 Ok(BytesText::wrap(
901 match minimal_escape(decoded) {
902 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
903 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
904 },
905 Decoder::utf8(),
906 ))
907 }
908
909 /// Decodes the raw input byte content of the CDATA section into a string,
910 /// without performing XML entity escaping.
911 ///
912 /// When this event produced by the XML reader, it uses the encoding information
913 /// associated with that reader to interpret the raw bytes contained within this
914 /// CDATA event.
915 ///
916 /// This method does not normalizes end-of-line characters as required by [specification].
917 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
918 ///
919 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
920 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
921 self.decoder.decode_cow(&self.content)
922 }
923
924 /// Decodes the raw input byte content of the CDATA section of the XML 1.0 or
925 /// HTML event into a string.
926 ///
927 /// When this event produced by the reader, it uses the encoding information
928 /// associated with that reader to interpret the raw bytes contained within
929 /// this CDATA event.
930 ///
931 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
932 /// is required.
933 ///
934 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
935 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
936 ///
937 /// This method also can be used to get HTML content, because rules the same.
938 ///
939 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
940 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
941 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
942 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
943 self.decoder.content(&self.content, normalize_xml10_eols)
944 }
945
946 /// Decodes the raw input byte content of the CDATA section of the XML 1.1 event
947 /// into a string.
948 ///
949 /// When this event produced by the reader, it uses the encoding information
950 /// associated with that reader to interpret the raw bytes contained within
951 /// this CDATA event.
952 ///
953 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
954 /// is required.
955 ///
956 /// Note, that this method should be used only if event represents XML 1.1 content,
957 /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
958 ///
959 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
960 ///
961 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
962 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
963 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
964 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
965 self.decoder.content(&self.content, normalize_xml11_eols)
966 }
967
968 /// Alias for [`xml11_content()`](Self::xml11_content).
969 #[inline]
970 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
971 self.xml11_content()
972 }
973
974 /// Alias for [`xml10_content()`](Self::xml10_content).
975 #[inline]
976 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
977 self.xml10_content()
978 }
979}
980
981impl<'a> Debug for BytesCData<'a> {
982 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
983 write!(f, "BytesCData {{ content: ")?;
984 write_cow_string(f, &self.content)?;
985 write!(f, " }}")
986 }
987}
988
989impl<'a> Deref for BytesCData<'a> {
990 type Target = [u8];
991
992 fn deref(&self) -> &[u8] {
993 &self.content
994 }
995}
996
997#[cfg(feature = "arbitrary")]
998impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
999 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1000 Ok(Self::new(<&str>::arbitrary(u)?))
1001 }
1002 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1003 <&str as arbitrary::Arbitrary>::size_hint(depth)
1004 }
1005}
1006
1007/// Iterator over `CDATA` sections in a string.
1008///
1009/// This iterator is created by the [`BytesCData::escaped`] method.
1010#[derive(Debug, Clone)]
1011pub struct CDataIterator<'a> {
1012 inner: utils::CDataIterator<'a>,
1013}
1014
1015impl<'a> Iterator for CDataIterator<'a> {
1016 type Item = BytesCData<'a>;
1017
1018 fn next(&mut self) -> Option<BytesCData<'a>> {
1019 self.inner
1020 .next()
1021 .map(|slice| BytesCData::wrap(slice.as_bytes(), Decoder::utf8()))
1022 }
1023}
1024
1025impl FusedIterator for CDataIterator<'_> {}
1026
1027////////////////////////////////////////////////////////////////////////////////////////////////////
1028
1029/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
1030///
1031/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1032/// returns the content of this event between `<?` and `?>`.
1033///
1034/// Note, that inner text will not contain `?>` sequence inside:
1035///
1036/// ```
1037/// # use quick_xml::events::{BytesPI, Event};
1038/// # use quick_xml::reader::Reader;
1039/// # use pretty_assertions::assert_eq;
1040/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
1041/// let content = "processing instruction >:-<~ ";
1042/// let event = BytesPI::new(content);
1043///
1044/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
1045/// // deref coercion of &BytesPI to &[u8]
1046/// assert_eq!(&event as &[u8], content.as_bytes());
1047/// // AsRef<[u8]> for &T + deref coercion
1048/// assert_eq!(event.as_ref(), content.as_bytes());
1049/// ```
1050///
1051/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
1052#[derive(Clone, Eq, PartialEq)]
1053pub struct BytesPI<'a> {
1054 content: BytesStart<'a>,
1055}
1056
1057impl<'a> BytesPI<'a> {
1058 /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1059 #[inline]
1060 pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
1061 Self {
1062 content: BytesStart::wrap(content, target_len, decoder),
1063 }
1064 }
1065
1066 /// Creates a new `BytesPI` from a string.
1067 ///
1068 /// # Warning
1069 ///
1070 /// `content` must not contain the `?>` sequence.
1071 #[inline]
1072 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1073 let buf = str_cow_to_bytes(content);
1074 let name_len = name_len(&buf);
1075 Self {
1076 content: BytesStart {
1077 buf,
1078 name_len,
1079 decoder: Decoder::utf8(),
1080 },
1081 }
1082 }
1083
1084 /// Ensures that all data is owned to extend the object's lifetime if
1085 /// necessary.
1086 #[inline]
1087 pub fn into_owned(self) -> BytesPI<'static> {
1088 BytesPI {
1089 content: self.content.into_owned(),
1090 }
1091 }
1092
1093 /// Extracts the inner `Cow` from the `BytesPI` event container.
1094 #[inline]
1095 pub fn into_inner(self) -> Cow<'a, [u8]> {
1096 self.content.buf
1097 }
1098
1099 /// Converts the event into a borrowed event.
1100 #[inline]
1101 pub fn borrow(&self) -> BytesPI<'_> {
1102 BytesPI {
1103 content: self.content.borrow(),
1104 }
1105 }
1106
1107 /// A target used to identify the application to which the instruction is directed.
1108 ///
1109 /// # Example
1110 ///
1111 /// ```
1112 /// # use pretty_assertions::assert_eq;
1113 /// use quick_xml::events::BytesPI;
1114 ///
1115 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1116 /// assert_eq!(instruction.target(), b"xml-stylesheet");
1117 /// ```
1118 #[inline]
1119 pub fn target(&self) -> &[u8] {
1120 self.content.name().0
1121 }
1122
1123 /// Content of the processing instruction. Contains everything between target
1124 /// name and the end of the instruction. A direct consequence is that the first
1125 /// character is always a space character.
1126 ///
1127 /// # Example
1128 ///
1129 /// ```
1130 /// # use pretty_assertions::assert_eq;
1131 /// use quick_xml::events::BytesPI;
1132 ///
1133 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1134 /// assert_eq!(instruction.content(), br#" href="style.css""#);
1135 /// ```
1136 #[inline]
1137 pub fn content(&self) -> &[u8] {
1138 self.content.attributes_raw()
1139 }
1140
1141 /// A view of the processing instructions' content as a list of key-value pairs.
1142 ///
1143 /// Key-value pairs are used in some processing instructions, for example in
1144 /// `<?xml-stylesheet?>`.
1145 ///
1146 /// Returned iterator does not validate attribute values as may required by
1147 /// target's rules. For example, it doesn't check that substring `?>` is not
1148 /// present in the attribute value. That shouldn't be the problem when event
1149 /// is produced by the reader, because reader detects end of processing instruction
1150 /// by the first `?>` sequence, as required by the specification, and therefore
1151 /// this sequence cannot appear inside it.
1152 ///
1153 /// # Example
1154 ///
1155 /// ```
1156 /// # use pretty_assertions::assert_eq;
1157 /// use std::borrow::Cow;
1158 /// use quick_xml::events::attributes::Attribute;
1159 /// use quick_xml::events::BytesPI;
1160 /// use quick_xml::name::QName;
1161 ///
1162 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1163 /// for attr in instruction.attributes() {
1164 /// assert_eq!(attr, Ok(Attribute {
1165 /// key: QName(b"href"),
1166 /// value: Cow::Borrowed(b"style.css"),
1167 /// }));
1168 /// }
1169 /// ```
1170 #[inline]
1171 pub fn attributes(&self) -> Attributes<'_> {
1172 self.content.attributes()
1173 }
1174}
1175
1176impl<'a> Debug for BytesPI<'a> {
1177 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1178 write!(f, "BytesPI {{ content: ")?;
1179 write_cow_string(f, &self.content.buf)?;
1180 write!(f, " }}")
1181 }
1182}
1183
1184impl<'a> Deref for BytesPI<'a> {
1185 type Target = [u8];
1186
1187 fn deref(&self) -> &[u8] {
1188 &self.content
1189 }
1190}
1191
1192#[cfg(feature = "arbitrary")]
1193impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1194 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1195 Ok(Self::new(<&str>::arbitrary(u)?))
1196 }
1197 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1198 <&str as arbitrary::Arbitrary>::size_hint(depth)
1199 }
1200}
1201
1202////////////////////////////////////////////////////////////////////////////////////////////////////
1203
1204/// An XML declaration (`Event::Decl`).
1205///
1206/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1207///
1208/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1209/// returns the content of this event between `<?` and `?>`.
1210///
1211/// Note, that inner text will not contain `?>` sequence inside:
1212///
1213/// ```
1214/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1215/// # use quick_xml::reader::Reader;
1216/// # use pretty_assertions::assert_eq;
1217/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1218/// let content = "xml version = '1.0' ";
1219/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1220///
1221/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1222/// // deref coercion of &BytesDecl to &[u8]
1223/// assert_eq!(&event as &[u8], content.as_bytes());
1224/// // AsRef<[u8]> for &T + deref coercion
1225/// assert_eq!(event.as_ref(), content.as_bytes());
1226/// ```
1227#[derive(Clone, Debug, Eq, PartialEq)]
1228pub struct BytesDecl<'a> {
1229 content: BytesStart<'a>,
1230}
1231
1232impl<'a> BytesDecl<'a> {
1233 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1234 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1235 /// attribute.
1236 ///
1237 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1238 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1239 /// the double quote character is not allowed in any of the attribute values.
1240 pub fn new(
1241 version: &str,
1242 encoding: Option<&str>,
1243 standalone: Option<&str>,
1244 ) -> BytesDecl<'static> {
1245 // Compute length of the buffer based on supplied attributes
1246 // ' encoding=""' => 12
1247 let encoding_attr_len = if let Some(xs) = encoding {
1248 12 + xs.len()
1249 } else {
1250 0
1251 };
1252 // ' standalone=""' => 14
1253 let standalone_attr_len = if let Some(xs) = standalone {
1254 14 + xs.len()
1255 } else {
1256 0
1257 };
1258 // 'xml version=""' => 14
1259 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1260
1261 buf.push_str("xml version=\"");
1262 buf.push_str(version);
1263
1264 if let Some(encoding_val) = encoding {
1265 buf.push_str("\" encoding=\"");
1266 buf.push_str(encoding_val);
1267 }
1268
1269 if let Some(standalone_val) = standalone {
1270 buf.push_str("\" standalone=\"");
1271 buf.push_str(standalone_val);
1272 }
1273 buf.push('"');
1274
1275 BytesDecl {
1276 content: BytesStart::from_content(buf, 3),
1277 }
1278 }
1279
1280 /// Creates a `BytesDecl` from a `BytesStart`
1281 pub const fn from_start(start: BytesStart<'a>) -> Self {
1282 Self { content: start }
1283 }
1284
1285 /// Gets xml version, excluding quotes (`'` or `"`).
1286 ///
1287 /// According to the [grammar], the version *must* be the first thing in the declaration.
1288 /// This method tries to extract the first thing in the declaration and return it.
1289 /// In case of multiple attributes value of the first one is returned.
1290 ///
1291 /// If version is missed in the declaration, or the first thing is not a version,
1292 /// [`IllFormedError::MissingDeclVersion`] will be returned.
1293 ///
1294 /// # Examples
1295 ///
1296 /// ```
1297 /// use quick_xml::errors::{Error, IllFormedError};
1298 /// use quick_xml::events::{BytesDecl, BytesStart};
1299 ///
1300 /// // <?xml version='1.1'?>
1301 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1302 /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1303 ///
1304 /// // <?xml version='1.0' version='1.1'?>
1305 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1306 /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1307 ///
1308 /// // <?xml encoding='utf-8'?>
1309 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1310 /// match decl.version() {
1311 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1312 /// _ => assert!(false),
1313 /// }
1314 ///
1315 /// // <?xml encoding='utf-8' version='1.1'?>
1316 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1317 /// match decl.version() {
1318 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1319 /// _ => assert!(false),
1320 /// }
1321 ///
1322 /// // <?xml?>
1323 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1324 /// match decl.version() {
1325 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1326 /// _ => assert!(false),
1327 /// }
1328 /// ```
1329 ///
1330 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1331 pub fn version(&self) -> Result<Cow<'_, [u8]>, Error> {
1332 // The version *must* be the first thing in the declaration.
1333 match self.content.attributes().with_checks(false).next() {
1334 Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1335 // first attribute was not "version"
1336 Some(Ok(a)) => {
1337 let found = from_utf8(a.key.as_ref())
1338 .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1339 .to_string();
1340 Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1341 found,
1342 ))))
1343 }
1344 // error parsing attributes
1345 Some(Err(e)) => Err(e.into()),
1346 // no attributes
1347 None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1348 }
1349 }
1350
1351 /// Gets xml encoding, excluding quotes (`'` or `"`).
1352 ///
1353 /// Although according to the [grammar] encoding must appear before `"standalone"`
1354 /// and after `"version"`, this method does not check that. The first occurrence
1355 /// of the attribute will be returned even if there are several. Also, method does
1356 /// not restrict symbols that can forming the encoding, so the returned encoding
1357 /// name may not correspond to the grammar.
1358 ///
1359 /// # Examples
1360 ///
1361 /// ```
1362 /// use std::borrow::Cow;
1363 /// use quick_xml::Error;
1364 /// use quick_xml::events::{BytesDecl, BytesStart};
1365 ///
1366 /// // <?xml version='1.1'?>
1367 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1368 /// assert!(decl.encoding().is_none());
1369 ///
1370 /// // <?xml encoding='utf-8'?>
1371 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1372 /// match decl.encoding() {
1373 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1374 /// _ => assert!(false),
1375 /// }
1376 ///
1377 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1378 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1379 /// match decl.encoding() {
1380 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1381 /// _ => assert!(false),
1382 /// }
1383 /// ```
1384 ///
1385 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1386 pub fn encoding(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1387 self.content
1388 .try_get_attribute("encoding")
1389 .map(|a| a.map(|a| a.value))
1390 .transpose()
1391 }
1392
1393 /// Gets xml standalone, excluding quotes (`'` or `"`).
1394 ///
1395 /// Although according to the [grammar] standalone flag must appear after `"version"`
1396 /// and `"encoding"`, this method does not check that. The first occurrence of the
1397 /// attribute will be returned even if there are several. Also, method does not
1398 /// restrict symbols that can forming the value, so the returned flag name may not
1399 /// correspond to the grammar.
1400 ///
1401 /// # Examples
1402 ///
1403 /// ```
1404 /// use std::borrow::Cow;
1405 /// use quick_xml::Error;
1406 /// use quick_xml::events::{BytesDecl, BytesStart};
1407 ///
1408 /// // <?xml version='1.1'?>
1409 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1410 /// assert!(decl.standalone().is_none());
1411 ///
1412 /// // <?xml standalone='yes'?>
1413 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1414 /// match decl.standalone() {
1415 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1416 /// _ => assert!(false),
1417 /// }
1418 ///
1419 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1420 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1421 /// match decl.standalone() {
1422 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1423 /// _ => assert!(false),
1424 /// }
1425 /// ```
1426 ///
1427 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1428 pub fn standalone(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1429 self.content
1430 .try_get_attribute("standalone")
1431 .map(|a| a.map(|a| a.value))
1432 .transpose()
1433 }
1434
1435 /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1436 /// algorithm.
1437 ///
1438 /// If encoding in not known, or `encoding` key was not found, returns `None`.
1439 /// In case of duplicated `encoding` key, encoding, corresponding to the first
1440 /// one, is returned.
1441 #[cfg(feature = "encoding")]
1442 pub fn encoder(&self) -> Option<&'static Encoding> {
1443 self.encoding()
1444 .and_then(|e| e.ok())
1445 .and_then(|e| Encoding::for_label(&e))
1446 }
1447
1448 /// Converts the event into an owned event.
1449 pub fn into_owned(self) -> BytesDecl<'static> {
1450 BytesDecl {
1451 content: self.content.into_owned(),
1452 }
1453 }
1454
1455 /// Converts the event into a borrowed event.
1456 #[inline]
1457 pub fn borrow(&self) -> BytesDecl<'_> {
1458 BytesDecl {
1459 content: self.content.borrow(),
1460 }
1461 }
1462}
1463
1464impl<'a> Deref for BytesDecl<'a> {
1465 type Target = [u8];
1466
1467 fn deref(&self) -> &[u8] {
1468 &self.content
1469 }
1470}
1471
1472#[cfg(feature = "arbitrary")]
1473impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1474 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1475 Ok(Self::new(
1476 <&str>::arbitrary(u)?,
1477 Option::<&str>::arbitrary(u)?,
1478 Option::<&str>::arbitrary(u)?,
1479 ))
1480 }
1481
1482 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1483 <&str as arbitrary::Arbitrary>::size_hint(depth)
1484 }
1485}
1486
1487////////////////////////////////////////////////////////////////////////////////////////////////////
1488
1489/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1490///
1491/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1492/// returns the content of this event between `&` and `;`:
1493///
1494/// ```
1495/// # use quick_xml::events::{BytesRef, Event};
1496/// # use quick_xml::reader::Reader;
1497/// # use pretty_assertions::assert_eq;
1498/// let mut reader = Reader::from_str(r#"&entity;"#);
1499/// let content = "entity";
1500/// let event = BytesRef::new(content);
1501///
1502/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1503/// // deref coercion of &BytesRef to &[u8]
1504/// assert_eq!(&event as &[u8], content.as_bytes());
1505/// // AsRef<[u8]> for &T + deref coercion
1506/// assert_eq!(event.as_ref(), content.as_bytes());
1507/// ```
1508#[derive(Clone, Eq, PartialEq)]
1509pub struct BytesRef<'a> {
1510 content: Cow<'a, [u8]>,
1511 /// Encoding in which the `content` is stored inside the event.
1512 decoder: Decoder,
1513}
1514
1515impl<'a> BytesRef<'a> {
1516 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1517 #[inline]
1518 pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1519 Self {
1520 content: Cow::Borrowed(content),
1521 decoder,
1522 }
1523 }
1524
1525 /// Creates a new `BytesRef` borrowing a slice.
1526 ///
1527 /// # Warning
1528 ///
1529 /// `name` must be a valid name.
1530 #[inline]
1531 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1532 Self {
1533 content: str_cow_to_bytes(name),
1534 decoder: Decoder::utf8(),
1535 }
1536 }
1537
1538 /// Converts the event into an owned event.
1539 pub fn into_owned(self) -> BytesRef<'static> {
1540 BytesRef {
1541 content: Cow::Owned(self.content.into_owned()),
1542 decoder: self.decoder,
1543 }
1544 }
1545
1546 /// Extracts the inner `Cow` from the `BytesRef` event container.
1547 #[inline]
1548 pub fn into_inner(self) -> Cow<'a, [u8]> {
1549 self.content
1550 }
1551
1552 /// Converts the event into a borrowed event.
1553 #[inline]
1554 pub fn borrow(&self) -> BytesRef<'_> {
1555 BytesRef {
1556 content: Cow::Borrowed(&self.content),
1557 decoder: self.decoder,
1558 }
1559 }
1560
1561 /// Decodes the content of the event.
1562 ///
1563 /// This will allocate if the value is encoded in non-UTF-8 encoding.
1564 ///
1565 /// This method does not normalizes end-of-line characters as required by [specification].
1566 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
1567 ///
1568 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
1569 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1570 self.decoder.decode_cow(&self.content)
1571 }
1572
1573 /// Decodes the content of the XML 1.0 or HTML event.
1574 ///
1575 /// When this event produced by the reader, it uses the encoding information
1576 /// associated with that reader to interpret the raw bytes contained within
1577 /// this general reference event.
1578 ///
1579 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1580 /// is required.
1581 ///
1582 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
1583 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1584 ///
1585 /// This method also can be used to get HTML content, because rules the same.
1586 ///
1587 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1588 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1589 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1590 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1591 self.decoder.content(&self.content, normalize_xml10_eols)
1592 }
1593
1594 /// Decodes the content of the XML 1.1 event.
1595 ///
1596 /// When this event produced by the reader, it uses the encoding information
1597 /// associated with that reader to interpret the raw bytes contained within
1598 /// this general reference event.
1599 ///
1600 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1601 /// is required.
1602 ///
1603 /// Note, that this method should be used only if event represents XML 1.1 content,
1604 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1605 ///
1606 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
1607 ///
1608 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1609 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1610 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1611 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1612 self.decoder.content(&self.content, normalize_xml11_eols)
1613 }
1614
1615 /// Alias for [`xml11_content()`](Self::xml11_content).
1616 #[inline]
1617 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1618 self.xml11_content()
1619 }
1620
1621 /// Alias for [`xml10_content()`](Self::xml10_content).
1622 #[inline]
1623 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1624 self.xml10_content()
1625 }
1626
1627 /// Returns `true` if the specified reference represents the character reference
1628 /// (`&#<number>;`).
1629 ///
1630 /// ```
1631 /// # use quick_xml::events::BytesRef;
1632 /// # use pretty_assertions::assert_eq;
1633 /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1634 /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1635 /// assert_eq!(BytesRef::new("lt" ).is_char_ref(), false);
1636 /// ```
1637 pub fn is_char_ref(&self) -> bool {
1638 matches!(self.content.first(), Some(b'#'))
1639 }
1640
1641 /// If this reference represents character reference, then resolves it and
1642 /// returns the character, otherwise returns `None`.
1643 ///
1644 /// This method does not check if character is allowed for XML, in other words,
1645 /// well-formedness constraint [WFC: Legal Char] is not enforced.
1646 /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1647 ///
1648 /// ```
1649 /// # use quick_xml::events::BytesRef;
1650 /// # use pretty_assertions::assert_eq;
1651 /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1652 /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1653 /// assert_eq!(BytesRef::new("lt" ).resolve_char_ref().unwrap(), None);
1654 /// ```
1655 ///
1656 /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1657 pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1658 if let Some(num) = self.decode()?.strip_prefix('#') {
1659 let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1660 return Ok(Some(ch));
1661 }
1662 Ok(None)
1663 }
1664}
1665
1666impl<'a> Debug for BytesRef<'a> {
1667 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1668 write!(f, "BytesRef {{ content: ")?;
1669 write_cow_string(f, &self.content)?;
1670 write!(f, " }}")
1671 }
1672}
1673
1674impl<'a> Deref for BytesRef<'a> {
1675 type Target = [u8];
1676
1677 fn deref(&self) -> &[u8] {
1678 &self.content
1679 }
1680}
1681
1682#[cfg(feature = "arbitrary")]
1683impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1684 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1685 Ok(Self::new(<&str>::arbitrary(u)?))
1686 }
1687
1688 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1689 <&str as arbitrary::Arbitrary>::size_hint(depth)
1690 }
1691}
1692
1693////////////////////////////////////////////////////////////////////////////////////////////////////
1694
1695/// Event emitted by [`Reader::read_event_into`].
1696///
1697/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1698#[derive(Clone, Debug, Eq, PartialEq)]
1699#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1700pub enum Event<'a> {
1701 /// Start tag (with attributes) `<tag attr="value">`.
1702 Start(BytesStart<'a>),
1703 /// End tag `</tag>`.
1704 End(BytesEnd<'a>),
1705 /// Empty element tag (with attributes) `<tag attr="value" />`.
1706 Empty(BytesStart<'a>),
1707 /// Escaped character data between tags.
1708 Text(BytesText<'a>),
1709 /// Unescaped character data stored in `<![CDATA[...]]>`.
1710 CData(BytesCData<'a>),
1711 /// Comment `<!-- ... -->`.
1712 Comment(BytesText<'a>),
1713 /// XML declaration `<?xml ...?>`.
1714 Decl(BytesDecl<'a>),
1715 /// Processing instruction `<?...?>`.
1716 PI(BytesPI<'a>),
1717 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1718 DocType(BytesText<'a>),
1719 /// General reference `&entity;` in the textual data. Can be either an entity
1720 /// reference, or a character reference.
1721 GeneralRef(BytesRef<'a>),
1722 /// End of XML document.
1723 Eof,
1724}
1725
1726impl<'a> Event<'a> {
1727 /// Converts the event to an owned version, untied to the lifetime of
1728 /// buffer used when reading but incurring a new, separate allocation.
1729 pub fn into_owned(self) -> Event<'static> {
1730 match self {
1731 Event::Start(e) => Event::Start(e.into_owned()),
1732 Event::End(e) => Event::End(e.into_owned()),
1733 Event::Empty(e) => Event::Empty(e.into_owned()),
1734 Event::Text(e) => Event::Text(e.into_owned()),
1735 Event::Comment(e) => Event::Comment(e.into_owned()),
1736 Event::CData(e) => Event::CData(e.into_owned()),
1737 Event::Decl(e) => Event::Decl(e.into_owned()),
1738 Event::PI(e) => Event::PI(e.into_owned()),
1739 Event::DocType(e) => Event::DocType(e.into_owned()),
1740 Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1741 Event::Eof => Event::Eof,
1742 }
1743 }
1744
1745 /// Converts the event into a borrowed event.
1746 #[inline]
1747 pub fn borrow(&self) -> Event<'_> {
1748 match self {
1749 Event::Start(e) => Event::Start(e.borrow()),
1750 Event::End(e) => Event::End(e.borrow()),
1751 Event::Empty(e) => Event::Empty(e.borrow()),
1752 Event::Text(e) => Event::Text(e.borrow()),
1753 Event::Comment(e) => Event::Comment(e.borrow()),
1754 Event::CData(e) => Event::CData(e.borrow()),
1755 Event::Decl(e) => Event::Decl(e.borrow()),
1756 Event::PI(e) => Event::PI(e.borrow()),
1757 Event::DocType(e) => Event::DocType(e.borrow()),
1758 Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1759 Event::Eof => Event::Eof,
1760 }
1761 }
1762}
1763
1764impl<'a> Deref for Event<'a> {
1765 type Target = [u8];
1766
1767 fn deref(&self) -> &[u8] {
1768 match *self {
1769 Event::Start(ref e) | Event::Empty(ref e) => e,
1770 Event::End(ref e) => e,
1771 Event::Text(ref e) => e,
1772 Event::Decl(ref e) => e,
1773 Event::PI(ref e) => e,
1774 Event::CData(ref e) => e,
1775 Event::Comment(ref e) => e,
1776 Event::DocType(ref e) => e,
1777 Event::GeneralRef(ref e) => e,
1778 Event::Eof => &[],
1779 }
1780 }
1781}
1782
1783impl<'a> AsRef<Event<'a>> for Event<'a> {
1784 fn as_ref(&self) -> &Event<'a> {
1785 self
1786 }
1787}
1788
1789////////////////////////////////////////////////////////////////////////////////////////////////////
1790
1791#[inline]
1792fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1793 match content.into() {
1794 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1795 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1796 }
1797}
1798
1799fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1800where
1801 F: FnOnce(&[u8]) -> &[u8],
1802{
1803 match value {
1804 Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1805 Cow::Owned(mut bytes) => {
1806 let trimmed = trim(&bytes);
1807 if trimmed.len() != bytes.len() {
1808 bytes = trimmed.to_vec();
1809 }
1810 Cow::Owned(bytes)
1811 }
1812 }
1813}
1814
1815#[cfg(test)]
1816mod test {
1817 use super::*;
1818 use pretty_assertions::assert_eq;
1819
1820 #[test]
1821 fn bytestart_create() {
1822 let b = BytesStart::new("test");
1823 assert_eq!(b.len(), 4);
1824 assert_eq!(b.name(), QName(b"test"));
1825 }
1826
1827 #[test]
1828 fn bytestart_set_name() {
1829 let mut b = BytesStart::new("test");
1830 assert_eq!(b.len(), 4);
1831 assert_eq!(b.name(), QName(b"test"));
1832 assert_eq!(b.attributes_raw(), b"");
1833 b.push_attribute(("x", "a"));
1834 assert_eq!(b.len(), 10);
1835 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1836 b.set_name(b"g");
1837 assert_eq!(b.len(), 7);
1838 assert_eq!(b.name(), QName(b"g"));
1839 }
1840
1841 #[test]
1842 fn bytestart_clear_attributes() {
1843 let mut b = BytesStart::new("test");
1844 b.push_attribute(("x", "y\"z"));
1845 b.push_attribute(("x", "y\"z"));
1846 b.clear_attributes();
1847 assert!(b.attributes().next().is_none());
1848 assert_eq!(b.len(), 4);
1849 assert_eq!(b.name(), QName(b"test"));
1850 }
1851}