fast_xml/events/mod.rs
1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by
20//! [`Reader::read_event`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event
33//! [`Reader`]: ../reader/struct.Reader.html
34//! [`Writer`]: ../writer/struct.Writer.html
35//! [`Event`]: enum.Event.html
36
37pub mod attributes;
38
39#[cfg(feature = "encoding_rs")]
40use encoding_rs::Encoding;
41use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Deref, str::from_utf8};
42
43use crate::escape::{do_unescape, escape, partial_escape};
44use crate::utils::write_cow_string;
45use crate::{errors::Error, errors::Result, reader::Reader};
46use attributes::{Attribute, Attributes};
47
48#[cfg(feature = "serialize")]
49use crate::escape::EscapeError;
50
51use memchr;
52
53/// Opening tag data (`Event::Start`), with optional attributes.
54///
55/// `<name attr="value">`.
56///
57/// The name can be accessed using the [`name`], [`local_name`] or [`unescaped`] methods. An
58/// iterator over the attributes is returned by the [`attributes`] method.
59///
60/// [`name`]: #method.name
61/// [`local_name`]: #method.local_name
62/// [`unescaped`]: #method.unescaped
63/// [`attributes`]: #method.attributes
64#[derive(Clone, Eq, PartialEq)]
65pub struct BytesStart<'a> {
66 /// content of the element, before any utf8 conversion
67 buf: Cow<'a, [u8]>,
68 /// end of the element name, the name starts at that the start of `buf`
69 name_len: usize,
70}
71
72impl<'a> BytesStart<'a> {
73 /// Creates a new `BytesStart` from the given content (name + attributes).
74 ///
75 /// # Warning
76 ///
77 /// `&content[..name_len]` is not checked to be a valid name
78 #[inline]
79 pub fn borrowed(content: &'a [u8], name_len: usize) -> Self {
80 BytesStart {
81 buf: Cow::Borrowed(content),
82 name_len,
83 }
84 }
85
86 /// Creates a new `BytesStart` from the given name.
87 ///
88 /// # Warning
89 ///
90 /// `&content` is not checked to be a valid name
91 #[inline]
92 pub fn borrowed_name(name: &'a [u8]) -> BytesStart<'a> {
93 Self::borrowed(name, name.len())
94 }
95
96 /// Creates a new `BytesStart` from the given content (name + attributes)
97 ///
98 /// Owns its contents.
99 #[inline]
100 pub fn owned<C: Into<Vec<u8>>>(content: C, name_len: usize) -> BytesStart<'static> {
101 BytesStart {
102 buf: Cow::Owned(content.into()),
103 name_len,
104 }
105 }
106
107 /// Creates a new `BytesStart` from the given name
108 ///
109 /// Owns its contents.
110 #[inline]
111 pub fn owned_name<C: Into<Vec<u8>>>(name: C) -> BytesStart<'static> {
112 let content = name.into();
113 BytesStart {
114 name_len: content.len(),
115 buf: Cow::Owned(content),
116 }
117 }
118
119 /// Converts the event into an owned event.
120 pub fn into_owned(self) -> BytesStart<'static> {
121 Self::owned(self.buf.into_owned(), self.name_len)
122 }
123
124 /// Converts the event into an owned event without taking ownership of Event
125 pub fn to_owned(&self) -> BytesStart<'static> {
126 Self::owned(self.buf.to_owned(), self.name_len)
127 }
128
129 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
130 ///
131 /// # Example
132 ///
133 /// ```rust
134 /// # use fast_xml::{Error, Writer};
135 /// use fast_xml::events::{BytesStart, Event};
136 ///
137 /// struct SomeStruct<'a> {
138 /// attrs: BytesStart<'a>,
139 /// // ...
140 /// }
141 /// # impl<'a> SomeStruct<'a> {
142 /// # fn example(&self) -> Result<(), Error> {
143 /// # let mut writer = Writer::new(Vec::new());
144 ///
145 /// writer.write_event(Event::Start(self.attrs.to_borrowed()))?;
146 /// // ...
147 /// writer.write_event(Event::End(self.attrs.to_end()))?;
148 /// # Ok(())
149 /// # }}
150 /// ```
151 ///
152 /// [`to_end`]: #method.to_end
153 pub fn to_borrowed(&self) -> BytesStart {
154 BytesStart::borrowed(&self.buf, self.name_len)
155 }
156
157 /// Creates new paired close tag
158 pub fn to_end(&self) -> BytesEnd {
159 BytesEnd::borrowed(self.name())
160 }
161
162 /// Gets the undecoded raw tag name as a `&[u8]`.
163 #[inline]
164 pub fn name(&self) -> &[u8] {
165 &self.buf[..self.name_len]
166 }
167
168 /// Gets the undecoded raw local tag name (excluding namespace) as a `&[u8]`.
169 ///
170 /// All content up to and including the first `:` character is removed from the tag name.
171 #[inline]
172 pub fn local_name(&self) -> &[u8] {
173 let name = self.name();
174 memchr::memchr(b':', name).map_or(name, |i| &name[i + 1..])
175 }
176
177 /// Gets the unescaped tag name.
178 ///
179 /// XML escape sequences like "`<`" will be replaced by their unescaped characters like
180 /// "`<`".
181 ///
182 /// See also [`unescaped_with_custom_entities()`](#method.unescaped_with_custom_entities)
183 #[inline]
184 pub fn unescaped(&self) -> Result<Cow<[u8]>> {
185 self.make_unescaped(None)
186 }
187
188 /// Gets the unescaped tag name, using custom entities.
189 ///
190 /// XML escape sequences like "`<`" will be replaced by their unescaped characters like
191 /// "`<`".
192 /// Additional entities can be provided in `custom_entities`.
193 ///
194 /// # Pre-condition
195 ///
196 /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
197 ///
198 /// See also [`unescaped()`](#method.unescaped)
199 #[inline]
200 pub fn unescaped_with_custom_entities<'s>(
201 &'s self,
202 custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
203 ) -> Result<Cow<'s, [u8]>> {
204 self.make_unescaped(Some(custom_entities))
205 }
206
207 #[inline]
208 fn make_unescaped<'s>(
209 &'s self,
210 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
211 ) -> Result<Cow<'s, [u8]>> {
212 do_unescape(&*self.buf, custom_entities).map_err(Error::EscapeError)
213 }
214
215 /// Returns the unescaped and decoded string value.
216 ///
217 /// This allocates a `String` in all cases. For performance reasons it might be a better idea to
218 /// instead use one of:
219 ///
220 /// * [`unescaped()`], as it doesn't allocate when no escape sequences are used.
221 /// * [`Reader::decode()`], as it only allocates when the decoding can't be performed otherwise.
222 ///
223 /// [`unescaped()`]: #method.unescaped
224 /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode
225 #[inline]
226 pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
227 self.do_unescape_and_decode_with_custom_entities(reader, None)
228 }
229
230 /// Returns the unescaped and decoded string value with custom entities.
231 ///
232 /// This allocates a `String` in all cases. For performance reasons it might be a better idea to
233 /// instead use one of:
234 ///
235 /// * [`unescaped_with_custom_entities()`], as it doesn't allocate when no escape sequences are used.
236 /// * [`Reader::decode()`], as it only allocates when the decoding can't be performed otherwise.
237 ///
238 /// [`unescaped_with_custom_entities()`]: #method.unescaped_with_custom_entities
239 /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode
240 ///
241 /// # Pre-condition
242 ///
243 /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
244 #[inline]
245 pub fn unescape_and_decode_with_custom_entities<B: BufRead>(
246 &self,
247 reader: &Reader<B>,
248 custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
249 ) -> Result<String> {
250 self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities))
251 }
252
253 #[cfg(feature = "encoding")]
254 #[inline]
255 fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
256 &self,
257 reader: &Reader<B>,
258 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
259 ) -> Result<String> {
260 let decoded = reader.decode(&*self);
261 let unescaped =
262 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
263 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
264 }
265
266 #[cfg(not(feature = "encoding"))]
267 #[inline]
268 fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
269 &self,
270 reader: &Reader<B>,
271 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
272 ) -> Result<String> {
273 let decoded = reader.decode(&*self)?;
274 let unescaped =
275 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
276 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
277 }
278
279 /// Edit the name of the BytesStart in-place
280 ///
281 /// # Warning
282 ///
283 /// `name` is not checked to be a valid name
284 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
285 let bytes = self.buf.to_mut();
286 bytes.splice(..self.name_len, name.iter().cloned());
287 self.name_len = name.len();
288 self
289 }
290}
291
292/// Attribute-related methods
293impl<'a> BytesStart<'a> {
294 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
295 ///
296 /// The yielded items must be convertible to [`Attribute`] using `Into`.
297 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
298 where
299 I: IntoIterator,
300 I::Item: Into<Attribute<'b>>,
301 {
302 self.extend_attributes(attributes);
303 self
304 }
305
306 /// Add additional attributes to this tag using an iterator.
307 ///
308 /// The yielded items must be convertible to [`Attribute`] using `Into`.
309 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
310 where
311 I: IntoIterator,
312 I::Item: Into<Attribute<'b>>,
313 {
314 for attr in attributes {
315 self.push_attribute(attr);
316 }
317 self
318 }
319
320 /// Adds an attribute to this element.
321 pub fn push_attribute<'b, A>(&mut self, attr: A)
322 where
323 A: Into<Attribute<'b>>,
324 {
325 let a = attr.into();
326 let bytes = self.buf.to_mut();
327 bytes.push(b' ');
328 bytes.extend_from_slice(a.key);
329 bytes.extend_from_slice(b"=\"");
330 bytes.extend_from_slice(&*a.value);
331 bytes.push(b'"');
332 }
333
334 /// Remove all attributes from the ByteStart
335 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
336 self.buf.to_mut().truncate(self.name_len);
337 self
338 }
339
340 /// Returns an iterator over the attributes of this tag.
341 pub fn attributes(&self) -> Attributes {
342 Attributes::new(&self.buf, self.name_len)
343 }
344
345 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
346 pub fn html_attributes(&self) -> Attributes {
347 Attributes::html(self, self.name_len)
348 }
349
350 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
351 /// including the whitespace after the tag name if there is any.
352 #[inline]
353 pub fn attributes_raw(&self) -> &[u8] {
354 &self.buf[self.name_len..]
355 }
356
357 /// Try to get an attribute
358 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
359 &'a self,
360 attr_name: N,
361 ) -> Result<Option<Attribute<'a>>> {
362 for a in self.attributes() {
363 let a = a?;
364 if a.key == attr_name.as_ref() {
365 return Ok(Some(a));
366 }
367 }
368 Ok(None)
369 }
370}
371
372impl<'a> std::fmt::Debug for BytesStart<'a> {
373 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
374 write!(f, "BytesStart {{ buf: ")?;
375 write_cow_string(f, &self.buf)?;
376 write!(f, ", name_len: {} }}", self.name_len)
377 }
378}
379
380////////////////////////////////////////////////////////////////////////////////////////////////////
381
382/// An XML declaration (`Event::Decl`).
383///
384/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
385#[derive(Clone, Debug, Eq, PartialEq)]
386pub struct BytesDecl<'a> {
387 element: BytesStart<'a>,
388}
389
390impl<'a> BytesDecl<'a> {
391 /// Creates a `BytesDecl` from a `BytesStart`
392 pub fn from_start(start: BytesStart<'a>) -> BytesDecl<'a> {
393 BytesDecl { element: start }
394 }
395
396 /// Gets xml version, excluding quotes (`'` or `"`).
397 ///
398 /// According to the [grammar], the version *must* be the first thing in the declaration.
399 /// This method tries to extract the first thing in the declaration and return it.
400 /// In case of multiple attributes value of the first one is returned.
401 ///
402 /// If version is missed in the declaration, or the first thing is not a version,
403 /// [`Error::XmlDeclWithoutVersion`] will be returned.
404 ///
405 /// # Examples
406 ///
407 /// ```
408 /// use std::borrow::Cow;
409 /// use fast_xml::Error;
410 /// use fast_xml::events::{BytesDecl, BytesStart};
411 ///
412 /// // <?xml version='1.1'?>
413 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" version='1.1'", 0));
414 /// assert_eq!(
415 /// decl.version().unwrap(),
416 /// Cow::Borrowed(b"1.1".as_ref())
417 /// );
418 ///
419 /// // <?xml version='1.0' version='1.1'?>
420 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" version='1.0' version='1.1'", 0));
421 /// assert_eq!(
422 /// decl.version().unwrap(),
423 /// Cow::Borrowed(b"1.0".as_ref())
424 /// );
425 ///
426 /// // <?xml encoding='utf-8'?>
427 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" encoding='utf-8'", 0));
428 /// match decl.version() {
429 /// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding".to_string()),
430 /// _ => assert!(false),
431 /// }
432 ///
433 /// // <?xml encoding='utf-8' version='1.1'?>
434 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" encoding='utf-8' version='1.1'", 0));
435 /// match decl.version() {
436 /// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding".to_string()),
437 /// _ => assert!(false),
438 /// }
439 ///
440 /// // <?xml?>
441 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b"", 0));
442 /// match decl.version() {
443 /// Err(Error::XmlDeclWithoutVersion(None)) => {},
444 /// _ => assert!(false),
445 /// }
446 /// ```
447 ///
448 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
449 pub fn version(&self) -> Result<Cow<[u8]>> {
450 // The version *must* be the first thing in the declaration.
451 match self.element.attributes().with_checks(false).next() {
452 Some(Ok(a)) if a.key == b"version" => Ok(a.value),
453 // first attribute was not "version"
454 Some(Ok(a)) => {
455 let found = from_utf8(a.key).map_err(Error::Utf8)?.to_string();
456 Err(Error::XmlDeclWithoutVersion(Some(found)))
457 }
458 // error parsing attributes
459 Some(Err(e)) => Err(e.into()),
460 // no attributes
461 None => Err(Error::XmlDeclWithoutVersion(None)),
462 }
463 }
464
465 /// Gets xml encoding, excluding quotes (`'` or `"`).
466 ///
467 /// Although according to the [grammar] encoding must appear before `"standalone"`
468 /// and after `"version"`, this method does not check that. The first occurrence
469 /// of the attribute will be returned even if there are several. Also, method does
470 /// not restrict symbols that can forming the encoding, so the returned encoding
471 /// name may not correspond to the grammar.
472 ///
473 /// # Examples
474 ///
475 /// ```
476 /// use std::borrow::Cow;
477 /// use fast_xml::Error;
478 /// use fast_xml::events::{BytesDecl, BytesStart};
479 ///
480 /// // <?xml version='1.1'?>
481 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" version='1.1'", 0));
482 /// assert!(decl.encoding().is_none());
483 ///
484 /// // <?xml encoding='utf-8'?>
485 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" encoding='utf-8'", 0));
486 /// match decl.encoding() {
487 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
488 /// _ => assert!(false),
489 /// }
490 ///
491 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
492 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" encoding='something_WRONG' encoding='utf-8'", 0));
493 /// match decl.encoding() {
494 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
495 /// _ => assert!(false),
496 /// }
497 /// ```
498 ///
499 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
500 pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> {
501 self.element
502 .try_get_attribute("encoding")
503 .map(|a| a.map(|a| a.value))
504 .transpose()
505 }
506
507 /// Gets xml standalone, excluding quotes (`'` or `"`).
508 ///
509 /// Although according to the [grammar] standalone flag must appear after `"version"`
510 /// and `"encoding"`, this method does not check that. The first occurrence of the
511 /// attribute will be returned even if there are several. Also, method does not
512 /// restrict symbols that can forming the value, so the returned flag name may not
513 /// correspond to the grammar.
514 ///
515 /// # Examples
516 ///
517 /// ```
518 /// use std::borrow::Cow;
519 /// use fast_xml::Error;
520 /// use fast_xml::events::{BytesDecl, BytesStart};
521 ///
522 /// // <?xml version='1.1'?>
523 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" version='1.1'", 0));
524 /// assert!(decl.standalone().is_none());
525 ///
526 /// // <?xml standalone='yes'?>
527 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" standalone='yes'", 0));
528 /// match decl.standalone() {
529 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
530 /// _ => assert!(false),
531 /// }
532 ///
533 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
534 /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" standalone='something_WRONG' encoding='utf-8'", 0));
535 /// match decl.standalone() {
536 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
537 /// _ => assert!(false),
538 /// }
539 /// ```
540 ///
541 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
542 pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> {
543 self.element
544 .try_get_attribute("standalone")
545 .map(|a| a.map(|a| a.value))
546 .transpose()
547 }
548
549 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
550 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
551 /// attribute.
552 ///
553 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
554 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
555 /// the double quote character is not allowed in any of the attribute values.
556 pub fn new(
557 version: &[u8],
558 encoding: Option<&[u8]>,
559 standalone: Option<&[u8]>,
560 ) -> BytesDecl<'static> {
561 // Compute length of the buffer based on supplied attributes
562 // ' encoding=""' => 12
563 let encoding_attr_len = if let Some(xs) = encoding {
564 12 + xs.len()
565 } else {
566 0
567 };
568 // ' standalone=""' => 14
569 let standalone_attr_len = if let Some(xs) = standalone {
570 14 + xs.len()
571 } else {
572 0
573 };
574 // 'xml version=""' => 14
575 let mut buf = Vec::with_capacity(14 + encoding_attr_len + standalone_attr_len);
576
577 buf.extend_from_slice(b"xml version=\"");
578 buf.extend_from_slice(version);
579
580 if let Some(encoding_val) = encoding {
581 buf.extend_from_slice(b"\" encoding=\"");
582 buf.extend_from_slice(encoding_val);
583 }
584
585 if let Some(standalone_val) = standalone {
586 buf.extend_from_slice(b"\" standalone=\"");
587 buf.extend_from_slice(standalone_val);
588 }
589 buf.push(b'"');
590
591 BytesDecl {
592 element: BytesStart::owned(buf, 3),
593 }
594 }
595
596 /// Gets the decoder struct
597 #[cfg(feature = "encoding_rs")]
598 pub fn encoder(&self) -> Option<&'static Encoding> {
599 self.encoding()
600 .and_then(|e| e.ok())
601 .and_then(|e| Encoding::for_label(&*e))
602 }
603
604 /// Converts the event into an owned event.
605 pub fn into_owned(self) -> BytesDecl<'static> {
606 BytesDecl {
607 element: self.element.into_owned(),
608 }
609 }
610}
611
612////////////////////////////////////////////////////////////////////////////////////////////////////
613
614/// A struct to manage `Event::End` events
615#[derive(Clone, Eq, PartialEq)]
616pub struct BytesEnd<'a> {
617 name: Cow<'a, [u8]>,
618}
619
620impl<'a> BytesEnd<'a> {
621 /// Creates a new `BytesEnd` borrowing a slice
622 #[inline]
623 pub fn borrowed(name: &'a [u8]) -> BytesEnd<'a> {
624 BytesEnd {
625 name: Cow::Borrowed(name),
626 }
627 }
628
629 /// Creates a new `BytesEnd` owning its name
630 #[inline]
631 pub fn owned(name: Vec<u8>) -> BytesEnd<'static> {
632 BytesEnd {
633 name: Cow::Owned(name),
634 }
635 }
636
637 /// Converts the event into an owned event.
638 pub fn into_owned(self) -> BytesEnd<'static> {
639 BytesEnd {
640 name: Cow::Owned(self.name.into_owned()),
641 }
642 }
643
644 /// Gets `BytesEnd` event name
645 #[inline]
646 pub fn name(&self) -> &[u8] {
647 &*self.name
648 }
649
650 /// local name (excluding namespace) as &[u8] (without eventual attributes)
651 /// returns the name() with any leading namespace removed (all content up to
652 /// and including the first ':' character)
653 #[inline]
654 pub fn local_name(&self) -> &[u8] {
655 if let Some(i) = self.name().iter().position(|b| *b == b':') {
656 &self.name()[i + 1..]
657 } else {
658 self.name()
659 }
660 }
661}
662
663impl<'a> std::fmt::Debug for BytesEnd<'a> {
664 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
665 write!(f, "BytesEnd {{ name: ")?;
666 write_cow_string(f, &self.name)?;
667 write!(f, " }}")
668 }
669}
670
671////////////////////////////////////////////////////////////////////////////////////////////////////
672
673/// Data from various events (most notably, `Event::Text`) that stored in XML
674/// in escaped form. Internally data is stored in escaped form
675#[derive(Clone, Eq, PartialEq)]
676pub struct BytesText<'a> {
677 // Invariant: The content is always escaped.
678 content: Cow<'a, [u8]>,
679}
680
681impl<'a> BytesText<'a> {
682 /// Creates a new `BytesText` from an escaped byte sequence.
683 #[inline]
684 pub fn from_escaped<C: Into<Cow<'a, [u8]>>>(content: C) -> Self {
685 Self {
686 content: content.into(),
687 }
688 }
689
690 /// Creates a new `BytesText` from a byte sequence. The byte sequence is
691 /// expected not to be escaped.
692 #[inline]
693 pub fn from_plain(content: &'a [u8]) -> Self {
694 Self {
695 content: escape(content),
696 }
697 }
698
699 /// Creates a new `BytesText` from an escaped string.
700 #[inline]
701 pub fn from_escaped_str<C: Into<Cow<'a, str>>>(content: C) -> Self {
702 Self::from_escaped(match content.into() {
703 Cow::Owned(o) => Cow::Owned(o.into_bytes()),
704 Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
705 })
706 }
707
708 /// Creates a new `BytesText` from a string. The string is expected not to
709 /// be escaped.
710 #[inline]
711 pub fn from_plain_str(content: &'a str) -> Self {
712 Self::from_plain(content.as_bytes())
713 }
714
715 /// Ensures that all data is owned to extend the object's lifetime if
716 /// necessary.
717 #[inline]
718 pub fn into_owned(self) -> BytesText<'static> {
719 BytesText {
720 content: self.content.into_owned().into(),
721 }
722 }
723
724 /// Extracts the inner `Cow` from the `BytesText` event container.
725 #[inline]
726 pub fn into_inner(self) -> Cow<'a, [u8]> {
727 self.content
728 }
729
730 /// Returns unescaped version of the text content, that can be written
731 /// as CDATA in XML
732 #[cfg(feature = "serialize")]
733 pub(crate) fn unescape(self) -> std::result::Result<BytesCData<'a>, EscapeError> {
734 //TODO: need to think about better API instead of dozens similar functions
735 // Maybe use builder pattern. After that expose function as public API
736 //FIXME: need to take into account entities defined in the document
737 Ok(BytesCData::new(match do_unescape(&self.content, None)? {
738 Cow::Borrowed(_) => self.content,
739 Cow::Owned(unescaped) => Cow::Owned(unescaped),
740 }))
741 }
742
743 /// gets escaped content
744 ///
745 /// Searches for '&' into content and try to escape the coded character if possible
746 /// returns Malformed error with index within element if '&' is not followed by ';'
747 ///
748 /// See also [`unescaped_with_custom_entities()`](#method.unescaped_with_custom_entities)
749 pub fn unescaped(&self) -> Result<Cow<[u8]>> {
750 self.make_unescaped(None)
751 }
752
753 /// gets escaped content with custom entities
754 ///
755 /// Searches for '&' into content and try to escape the coded character if possible
756 /// returns Malformed error with index within element if '&' is not followed by ';'
757 /// Additional entities can be provided in `custom_entities`.
758 ///
759 /// # Pre-condition
760 ///
761 /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
762 ///
763 /// See also [`unescaped()`](#method.unescaped)
764 pub fn unescaped_with_custom_entities<'s>(
765 &'s self,
766 custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
767 ) -> Result<Cow<'s, [u8]>> {
768 self.make_unescaped(Some(custom_entities))
769 }
770
771 fn make_unescaped<'s>(
772 &'s self,
773 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
774 ) -> Result<Cow<'s, [u8]>> {
775 do_unescape(self, custom_entities).map_err(Error::EscapeError)
776 }
777
778 /// helper method to unescape then decode self using the reader encoding
779 /// but without BOM (Byte order mark)
780 ///
781 /// for performance reasons (could avoid allocating a `String`),
782 /// it might be wiser to manually use
783 /// 1. BytesText::unescaped()
784 /// 2. Reader::decode(...)
785 #[cfg(feature = "encoding")]
786 pub fn unescape_and_decode_without_bom<B: BufRead>(
787 &self,
788 reader: &mut Reader<B>,
789 ) -> Result<String> {
790 self.do_unescape_and_decode_without_bom(reader, None)
791 }
792
793 /// helper method to unescape then decode self using the reader encoding
794 /// but without BOM (Byte order mark)
795 ///
796 /// for performance reasons (could avoid allocating a `String`),
797 /// it might be wiser to manually use
798 /// 1. BytesText::unescaped()
799 /// 2. Reader::decode(...)
800 #[cfg(not(feature = "encoding"))]
801 pub fn unescape_and_decode_without_bom<B: BufRead>(
802 &self,
803 reader: &Reader<B>,
804 ) -> Result<String> {
805 self.do_unescape_and_decode_without_bom(reader, None)
806 }
807
808 /// helper method to unescape then decode self using the reader encoding with custom entities
809 /// but without BOM (Byte order mark)
810 ///
811 /// for performance reasons (could avoid allocating a `String`),
812 /// it might be wiser to manually use
813 /// 1. BytesText::unescaped()
814 /// 2. Reader::decode(...)
815 ///
816 /// # Pre-condition
817 ///
818 /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
819 #[cfg(feature = "encoding")]
820 pub fn unescape_and_decode_without_bom_with_custom_entities<B: BufRead>(
821 &self,
822 reader: &mut Reader<B>,
823 custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
824 ) -> Result<String> {
825 self.do_unescape_and_decode_without_bom(reader, Some(custom_entities))
826 }
827
828 /// helper method to unescape then decode self using the reader encoding with custom entities
829 /// but without BOM (Byte order mark)
830 ///
831 /// for performance reasons (could avoid allocating a `String`),
832 /// it might be wiser to manually use
833 /// 1. BytesText::unescaped()
834 /// 2. Reader::decode(...)
835 ///
836 /// # Pre-condition
837 ///
838 /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
839 #[cfg(not(feature = "encoding"))]
840 pub fn unescape_and_decode_without_bom_with_custom_entities<B: BufRead>(
841 &self,
842 reader: &Reader<B>,
843 custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
844 ) -> Result<String> {
845 self.do_unescape_and_decode_without_bom(reader, Some(custom_entities))
846 }
847
848 #[cfg(feature = "encoding")]
849 fn do_unescape_and_decode_without_bom<B: BufRead>(
850 &self,
851 reader: &mut Reader<B>,
852 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
853 ) -> Result<String> {
854 let decoded = reader.decode_without_bom(&*self);
855 let unescaped =
856 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
857 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
858 }
859
860 #[cfg(not(feature = "encoding"))]
861 fn do_unescape_and_decode_without_bom<B: BufRead>(
862 &self,
863 reader: &Reader<B>,
864 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
865 ) -> Result<String> {
866 let decoded = reader.decode_without_bom(&*self)?;
867 let unescaped =
868 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
869 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
870 }
871
872 /// helper method to unescape then decode self using the reader encoding
873 ///
874 /// for performance reasons (could avoid allocating a `String`),
875 /// it might be wiser to manually use
876 /// 1. BytesText::unescaped()
877 /// 2. Reader::decode(...)
878 pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
879 self.do_unescape_and_decode_with_custom_entities(reader, None)
880 }
881
882 /// helper method to unescape then decode self using the reader encoding with custom entities
883 ///
884 /// for performance reasons (could avoid allocating a `String`),
885 /// it might be wiser to manually use
886 /// 1. BytesText::unescaped()
887 /// 2. Reader::decode(...)
888 ///
889 /// # Pre-condition
890 ///
891 /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
892 pub fn unescape_and_decode_with_custom_entities<B: BufRead>(
893 &self,
894 reader: &Reader<B>,
895 custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
896 ) -> Result<String> {
897 self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities))
898 }
899
900 #[cfg(feature = "encoding")]
901 fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
902 &self,
903 reader: &Reader<B>,
904 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
905 ) -> Result<String> {
906 let decoded = reader.decode(&*self);
907 let unescaped =
908 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
909 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
910 }
911
912 #[cfg(not(feature = "encoding"))]
913 fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
914 &self,
915 reader: &Reader<B>,
916 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
917 ) -> Result<String> {
918 let decoded = reader.decode(&*self)?;
919 let unescaped =
920 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
921 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
922 }
923
924 /// Gets escaped content.
925 pub fn escaped(&self) -> &[u8] {
926 self.content.as_ref()
927 }
928}
929
930impl<'a> std::fmt::Debug for BytesText<'a> {
931 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
932 write!(f, "BytesText {{ content: ")?;
933 write_cow_string(f, &self.content)?;
934 write!(f, " }}")
935 }
936}
937
938////////////////////////////////////////////////////////////////////////////////////////////////////
939
940/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
941/// [convert](Self::escape) it to [`BytesText`]
942#[derive(Clone, Eq, PartialEq)]
943pub struct BytesCData<'a> {
944 content: Cow<'a, [u8]>,
945}
946
947impl<'a> BytesCData<'a> {
948 /// Creates a new `BytesCData` from a byte sequence.
949 #[inline]
950 pub fn new<C: Into<Cow<'a, [u8]>>>(content: C) -> Self {
951 Self {
952 content: content.into(),
953 }
954 }
955
956 /// Creates a new `BytesCData` from a string
957 #[inline]
958 pub fn from_str(content: &'a str) -> Self {
959 Self::new(content.as_bytes())
960 }
961
962 /// Ensures that all data is owned to extend the object's lifetime if
963 /// necessary.
964 #[inline]
965 pub fn into_owned(self) -> BytesCData<'static> {
966 BytesCData {
967 content: self.content.into_owned().into(),
968 }
969 }
970
971 /// Extracts the inner `Cow` from the `BytesCData` event container.
972 #[inline]
973 pub fn into_inner(self) -> Cow<'a, [u8]> {
974 self.content
975 }
976
977 /// Converts this CDATA content to an escaped version, that can be written
978 /// as an usual text in XML.
979 ///
980 /// This function performs following replacements:
981 ///
982 /// | Character | Replacement
983 /// |-----------|------------
984 /// | `<` | `<`
985 /// | `>` | `>`
986 /// | `&` | `&`
987 /// | `'` | `'`
988 /// | `"` | `"`
989 pub fn escape(self) -> BytesText<'a> {
990 BytesText::from_escaped(match escape(&self.content) {
991 Cow::Borrowed(_) => self.content,
992 Cow::Owned(escaped) => Cow::Owned(escaped),
993 })
994 }
995
996 /// Converts this CDATA content to an escaped version, that can be written
997 /// as an usual text in XML.
998 ///
999 /// In XML text content, it is allowed (though not recommended) to leave
1000 /// the quote special characters `"` and `'` unescaped.
1001 ///
1002 /// This function performs following replacements:
1003 ///
1004 /// | Character | Replacement
1005 /// |-----------|------------
1006 /// | `<` | `<`
1007 /// | `>` | `>`
1008 /// | `&` | `&`
1009 pub fn partial_escape(self) -> BytesText<'a> {
1010 BytesText::from_escaped(match partial_escape(&self.content) {
1011 Cow::Borrowed(_) => self.content,
1012 Cow::Owned(escaped) => Cow::Owned(escaped),
1013 })
1014 }
1015
1016 /// Gets content of this text buffer in the specified encoding
1017 #[cfg(feature = "serialize")]
1018 pub(crate) fn decode(&self, decoder: crate::reader::Decoder) -> Result<Cow<'a, str>> {
1019 Ok(match &self.content {
1020 Cow::Borrowed(bytes) => {
1021 #[cfg(feature = "encoding")]
1022 {
1023 decoder.decode(bytes)
1024 }
1025 #[cfg(not(feature = "encoding"))]
1026 {
1027 decoder.decode(bytes)?.into()
1028 }
1029 }
1030 Cow::Owned(bytes) => {
1031 #[cfg(feature = "encoding")]
1032 let decoded = decoder.decode(bytes).into_owned();
1033
1034 #[cfg(not(feature = "encoding"))]
1035 let decoded = decoder.decode(bytes)?.to_string();
1036
1037 decoded.into()
1038 }
1039 })
1040 }
1041}
1042
1043impl<'a> std::fmt::Debug for BytesCData<'a> {
1044 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1045 write!(f, "BytesCData {{ content: ")?;
1046 write_cow_string(f, &self.content)?;
1047 write!(f, " }}")
1048 }
1049}
1050
1051////////////////////////////////////////////////////////////////////////////////////////////////////
1052
1053/// Event emitted by [`Reader::read_event`].
1054///
1055/// [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event
1056#[derive(Clone, Debug, Eq, PartialEq)]
1057pub enum Event<'a> {
1058 /// Start tag (with attributes) `<tag attr="value">`.
1059 Start(BytesStart<'a>),
1060 /// End tag `</tag>`.
1061 End(BytesEnd<'a>),
1062 /// Empty element tag (with attributes) `<tag attr="value" />`.
1063 Empty(BytesStart<'a>),
1064 /// Character data between `Start` and `End` element.
1065 Text(BytesText<'a>),
1066 /// Comment `<!-- ... -->`.
1067 Comment(BytesText<'a>),
1068 /// CData `<![CDATA[...]]>`.
1069 CData(BytesCData<'a>),
1070 /// XML declaration `<?xml ...?>`.
1071 Decl(BytesDecl<'a>),
1072 /// Processing instruction `<?...?>`.
1073 PI(BytesText<'a>),
1074 /// Doctype `<!DOCTYPE ...>`.
1075 DocType(BytesText<'a>),
1076 /// End of XML document.
1077 Eof,
1078}
1079
1080impl<'a> Event<'a> {
1081 /// Converts the event to an owned version, untied to the lifetime of
1082 /// buffer used when reading but incurring a new, separate allocation.
1083 pub fn into_owned(self) -> Event<'static> {
1084 match self {
1085 Event::Start(e) => Event::Start(e.into_owned()),
1086 Event::End(e) => Event::End(e.into_owned()),
1087 Event::Empty(e) => Event::Empty(e.into_owned()),
1088 Event::Text(e) => Event::Text(e.into_owned()),
1089 Event::Comment(e) => Event::Comment(e.into_owned()),
1090 Event::CData(e) => Event::CData(e.into_owned()),
1091 Event::Decl(e) => Event::Decl(e.into_owned()),
1092 Event::PI(e) => Event::PI(e.into_owned()),
1093 Event::DocType(e) => Event::DocType(e.into_owned()),
1094 Event::Eof => Event::Eof,
1095 }
1096 }
1097}
1098
1099////////////////////////////////////////////////////////////////////////////////////////////////////
1100
1101impl<'a> Deref for BytesStart<'a> {
1102 type Target = [u8];
1103 fn deref(&self) -> &[u8] {
1104 &*self.buf
1105 }
1106}
1107
1108impl<'a> Deref for BytesDecl<'a> {
1109 type Target = [u8];
1110 fn deref(&self) -> &[u8] {
1111 &*self.element
1112 }
1113}
1114
1115impl<'a> Deref for BytesEnd<'a> {
1116 type Target = [u8];
1117 fn deref(&self) -> &[u8] {
1118 &*self.name
1119 }
1120}
1121
1122impl<'a> Deref for BytesText<'a> {
1123 type Target = [u8];
1124 fn deref(&self) -> &[u8] {
1125 &*self.content
1126 }
1127}
1128
1129impl<'a> Deref for BytesCData<'a> {
1130 type Target = [u8];
1131
1132 fn deref(&self) -> &[u8] {
1133 &*self.content
1134 }
1135}
1136
1137impl<'a> Deref for Event<'a> {
1138 type Target = [u8];
1139 fn deref(&self) -> &[u8] {
1140 match *self {
1141 Event::Start(ref e) | Event::Empty(ref e) => &*e,
1142 Event::End(ref e) => &*e,
1143 Event::Text(ref e) => &*e,
1144 Event::Decl(ref e) => &*e,
1145 Event::PI(ref e) => &*e,
1146 Event::CData(ref e) => &*e,
1147 Event::Comment(ref e) => &*e,
1148 Event::DocType(ref e) => &*e,
1149 Event::Eof => &[],
1150 }
1151 }
1152}
1153
1154impl<'a> AsRef<Event<'a>> for Event<'a> {
1155 fn as_ref(&self) -> &Event<'a> {
1156 self
1157 }
1158}
1159
1160////////////////////////////////////////////////////////////////////////////////////////////////////
1161
1162#[cfg(test)]
1163mod test {
1164 use super::*;
1165 use pretty_assertions::assert_eq;
1166
1167 #[test]
1168 fn local_name() {
1169 use std::str::from_utf8;
1170 let xml = r#"
1171 <foo:bus attr='bar'>foobusbar</foo:bus>
1172 <foo: attr='bar'>foobusbar</foo:>
1173 <:foo attr='bar'>foobusbar</:foo>
1174 <foo:bus:baz attr='bar'>foobusbar</foo:bus:baz>
1175 "#;
1176 let mut rdr = Reader::from_str(xml);
1177 let mut buf = Vec::new();
1178 let mut parsed_local_names = Vec::new();
1179 loop {
1180 match rdr.read_event(&mut buf).expect("unable to read xml event") {
1181 Event::Start(ref e) => parsed_local_names.push(
1182 from_utf8(e.local_name())
1183 .expect("unable to build str from local_name")
1184 .to_string(),
1185 ),
1186 Event::End(ref e) => parsed_local_names.push(
1187 from_utf8(e.local_name())
1188 .expect("unable to build str from local_name")
1189 .to_string(),
1190 ),
1191 Event::Eof => break,
1192 _ => {}
1193 }
1194 }
1195 assert_eq!(parsed_local_names[0], "bus".to_string());
1196 assert_eq!(parsed_local_names[1], "bus".to_string());
1197 assert_eq!(parsed_local_names[2], "".to_string());
1198 assert_eq!(parsed_local_names[3], "".to_string());
1199 assert_eq!(parsed_local_names[4], "foo".to_string());
1200 assert_eq!(parsed_local_names[5], "foo".to_string());
1201 assert_eq!(parsed_local_names[6], "bus:baz".to_string());
1202 assert_eq!(parsed_local_names[7], "bus:baz".to_string());
1203 }
1204
1205 #[test]
1206 fn bytestart_create() {
1207 let b = BytesStart::owned_name("test");
1208 assert_eq!(b.len(), 4);
1209 assert_eq!(b.name(), b"test");
1210 }
1211
1212 #[test]
1213 fn bytestart_set_name() {
1214 let mut b = BytesStart::owned_name("test");
1215 assert_eq!(b.len(), 4);
1216 assert_eq!(b.name(), b"test");
1217 assert_eq!(b.attributes_raw(), b"");
1218 b.push_attribute(("x", "a"));
1219 assert_eq!(b.len(), 10);
1220 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1221 b.set_name(b"g");
1222 assert_eq!(b.len(), 7);
1223 assert_eq!(b.name(), b"g");
1224 }
1225
1226 #[test]
1227 fn bytestart_clear_attributes() {
1228 let mut b = BytesStart::owned_name("test");
1229 b.push_attribute(("x", "y\"z"));
1230 b.push_attribute(("x", "y\"z"));
1231 b.clear_attributes();
1232 assert!(b.attributes().next().is_none());
1233 assert_eq!(b.len(), 4);
1234 assert_eq!(b.name(), b"test");
1235 }
1236}