quick_xml/reader/ns_reader.rs
1//! A reader that manages namespace declarations found in the input and able
2//! to resolve [qualified names] to [expanded names].
3//!
4//! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
5//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
6
7use std::fs::File;
8use std::io::{BufRead, BufReader};
9use std::ops::Deref;
10use std::path::Path;
11
12use crate::errors::Result;
13use crate::events::{BytesText, Event};
14use crate::name::{NamespaceResolver, QName, ResolveResult};
15use crate::reader::{Config, Reader, Span, XmlSource};
16
17/// A low level encoding-agnostic XML event reader that performs namespace resolution.
18///
19/// Consumes a [`BufRead`] and streams XML `Event`s.
20#[derive(Debug, Clone)]
21pub struct NsReader<R> {
22 /// An XML reader
23 pub(super) reader: Reader<R>,
24 /// A buffer to manage namespaces
25 pub(super) ns_resolver: NamespaceResolver,
26 /// We cannot pop data from the namespace stack until returned `Empty` or `End`
27 /// event will be processed by the user, so we only mark that we should that
28 /// in the next [`Self::read_event_impl()`] call.
29 pending_pop: bool,
30}
31
32/// Builder methods
33impl<R> NsReader<R> {
34 /// Creates a `NsReader` that reads from a reader.
35 #[inline]
36 pub fn from_reader(reader: R) -> Self {
37 Self::new(Reader::from_reader(reader))
38 }
39
40 /// Returns reference to the parser configuration
41 #[inline]
42 pub const fn config(&self) -> &Config {
43 self.reader.config()
44 }
45
46 /// Returns mutable reference to the parser configuration
47 #[inline]
48 pub fn config_mut(&mut self) -> &mut Config {
49 self.reader.config_mut()
50 }
51}
52
53/// Private methods
54impl<R> NsReader<R> {
55 #[inline]
56 fn new(reader: Reader<R>) -> Self {
57 Self {
58 reader,
59 ns_resolver: NamespaceResolver::default(),
60 pending_pop: false,
61 }
62 }
63
64 fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
65 where
66 R: XmlSource<'i, B>,
67 {
68 self.pop();
69 let event = self.reader.read_event_impl(buf);
70 self.process_event(event)
71 }
72
73 pub(super) fn pop(&mut self) {
74 if self.pending_pop {
75 self.ns_resolver.pop();
76 self.pending_pop = false;
77 }
78 }
79
80 pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
81 match event {
82 Ok(Event::Start(e)) => {
83 self.ns_resolver.push(&e)?;
84 Ok(Event::Start(e))
85 }
86 Ok(Event::Empty(e)) => {
87 self.ns_resolver.push(&e)?;
88 // notify next `read_event_impl()` invocation that it needs to pop this
89 // namespace scope
90 self.pending_pop = true;
91 Ok(Event::Empty(e))
92 }
93 Ok(Event::End(e)) => {
94 // notify next `read_event_impl()` invocation that it needs to pop this
95 // namespace scope
96 self.pending_pop = true;
97 Ok(Event::End(e))
98 }
99 e => e,
100 }
101 }
102}
103
104/// Getters
105impl<R> NsReader<R> {
106 /// Consumes `NsReader` returning the underlying reader
107 ///
108 /// See the [`Reader::into_inner`] for examples
109 #[inline]
110 pub fn into_inner(self) -> R {
111 self.reader.into_inner()
112 }
113
114 /// Gets a mutable reference to the underlying reader.
115 pub fn get_mut(&mut self) -> &mut R {
116 self.reader.get_mut()
117 }
118
119 /// Returns a storage of namespace bindings associated with this reader.
120 #[inline]
121 pub const fn resolver(&self) -> &NamespaceResolver {
122 &self.ns_resolver
123 }
124
125 /// Returns a mutable reference to the storage of namespace bindings
126 /// associated with this reader.
127 ///
128 /// Useful for configuring the resolver, e.g. to change the
129 /// [per-element namespace-declaration limit](NamespaceResolver::set_max_declarations_per_element).
130 #[inline]
131 pub fn resolver_mut(&mut self) -> &mut NamespaceResolver {
132 &mut self.ns_resolver
133 }
134}
135
136impl<R: BufRead> NsReader<R> {
137 /// Reads the next event into given buffer.
138 ///
139 /// This method manages namespaces but doesn't resolve them automatically.
140 /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
141 ///
142 /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
143 /// namespace as soon as you get an event.
144 ///
145 /// # Examples
146 ///
147 /// ```
148 /// # use pretty_assertions::assert_eq;
149 /// use quick_xml::events::Event;
150 /// use quick_xml::name::{Namespace, ResolveResult::*};
151 /// use quick_xml::reader::NsReader;
152 ///
153 /// let mut reader = NsReader::from_str(r#"
154 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
155 /// <y:tag2><!--Test comment-->Test</y:tag2>
156 /// <y:tag2>Test 2</y:tag2>
157 /// </x:tag1>
158 /// "#);
159 /// reader.config_mut().trim_text(true);
160 ///
161 /// let mut count = 0;
162 /// let mut buf = Vec::new();
163 /// let mut txt = Vec::new();
164 /// loop {
165 /// match reader.read_event_into(&mut buf).unwrap() {
166 /// Event::Start(e) => {
167 /// count += 1;
168 /// let (ns, local) = reader.resolver().resolve_element(e.name());
169 /// match local.as_ref() {
170 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
171 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
172 /// _ => unreachable!(),
173 /// }
174 /// }
175 /// Event::Text(e) => {
176 /// txt.push(e.decode().unwrap().into_owned())
177 /// }
178 /// Event::Eof => break,
179 /// _ => (),
180 /// }
181 /// buf.clear();
182 /// }
183 /// assert_eq!(count, 3);
184 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
185 /// ```
186 ///
187 /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
188 /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
189 #[inline]
190 pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
191 self.read_event_impl(buf)
192 }
193
194 /// Reads the next event into given buffer and resolves its namespace (if applicable).
195 ///
196 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
197 /// For all other events the concept of namespace is not defined, so
198 /// a [`ResolveResult::Unbound`] is returned.
199 ///
200 /// If you are not interested in namespaces, you can use [`read_event_into()`]
201 /// which will not automatically resolve namespaces for you.
202 ///
203 /// # Examples
204 ///
205 /// ```
206 /// # use pretty_assertions::assert_eq;
207 /// use quick_xml::events::Event;
208 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
209 /// use quick_xml::reader::NsReader;
210 ///
211 /// let mut reader = NsReader::from_str(r#"
212 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
213 /// <y:tag2><!--Test comment-->Test</y:tag2>
214 /// <y:tag2>Test 2</y:tag2>
215 /// </x:tag1>
216 /// "#);
217 /// reader.config_mut().trim_text(true);
218 ///
219 /// let mut count = 0;
220 /// let mut buf = Vec::new();
221 /// let mut txt = Vec::new();
222 /// loop {
223 /// match reader.read_resolved_event_into(&mut buf).unwrap() {
224 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
225 /// count += 1;
226 /// assert_eq!(e.local_name(), QName(b"tag1").into());
227 /// }
228 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
229 /// count += 1;
230 /// assert_eq!(e.local_name(), QName(b"tag2").into());
231 /// }
232 /// (_, Event::Start(_)) => unreachable!(),
233 ///
234 /// (_, Event::Text(e)) => {
235 /// txt.push(e.decode().unwrap().into_owned())
236 /// }
237 /// (_, Event::Eof) => break,
238 /// _ => (),
239 /// }
240 /// buf.clear();
241 /// }
242 /// assert_eq!(count, 3);
243 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
244 /// ```
245 ///
246 /// [`Start`]: Event::Start
247 /// [`Empty`]: Event::Empty
248 /// [`End`]: Event::End
249 /// [`read_event_into()`]: Self::read_event_into
250 #[inline]
251 pub fn read_resolved_event_into<'b>(
252 &mut self,
253 buf: &'b mut Vec<u8>,
254 ) -> Result<(ResolveResult<'_>, Event<'b>)> {
255 let event = self.read_event_impl(buf)?;
256 Ok(self.ns_resolver.resolve_event(event))
257 }
258
259 /// Reads until end element is found using provided buffer as intermediate
260 /// storage for events content. This function is supposed to be called after
261 /// you already read a [`Start`] event.
262 ///
263 /// Returns a span that cover content between `>` of an opening tag and `<` of
264 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
265 /// this method was called after reading expanded [`Start`] event.
266 ///
267 /// Manages nested cases where parent and child elements have the _literally_
268 /// same name.
269 ///
270 /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
271 /// will be returned. In particularly, that error will be returned if you call
272 /// this method without consuming the corresponding [`Start`] event first.
273 ///
274 /// If your reader created from a string slice or byte array slice, it is
275 /// better to use [`read_to_end()`] method, because it will not copy bytes
276 /// into intermediate buffer.
277 ///
278 /// The provided `buf` buffer will be filled only by one event content at time.
279 /// Before reading of each event the buffer will be cleared. If you know an
280 /// appropriate size of each event, you can preallocate the buffer to reduce
281 /// number of reallocations.
282 ///
283 /// The `end` parameter should contain name of the end element _in the reader
284 /// encoding_. It is good practice to always get that parameter using
285 /// [`BytesStart::to_end()`] method.
286 ///
287 /// # Namespaces
288 ///
289 /// While the `NsReader` does namespace resolution, namespaces does not
290 /// change the algorithm for comparing names. Although the names `a:name`
291 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
292 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
293 /// according to [the specification]
294 ///
295 /// > The end of every element that begins with a **start-tag** MUST be marked
296 /// > by an **end-tag** containing a name that echoes the element's type as
297 /// > given in the **start-tag**
298 ///
299 /// # Examples
300 ///
301 /// This example shows, how you can skip XML content after you read the
302 /// start event.
303 ///
304 /// ```
305 /// # use pretty_assertions::assert_eq;
306 /// use quick_xml::events::{BytesStart, Event};
307 /// use quick_xml::name::{Namespace, ResolveResult};
308 /// use quick_xml::reader::NsReader;
309 ///
310 /// let mut reader = NsReader::from_str(r#"
311 /// <outer xmlns="namespace 1">
312 /// <inner xmlns="namespace 2">
313 /// <outer></outer>
314 /// </inner>
315 /// <inner>
316 /// <inner></inner>
317 /// <inner/>
318 /// <outer></outer>
319 /// <p:outer xmlns:p="ns"></p:outer>
320 /// <outer/>
321 /// </inner>
322 /// </outer>
323 /// "#);
324 /// reader.config_mut().trim_text(true);
325 /// let mut buf = Vec::new();
326 ///
327 /// let ns = Namespace(b"namespace 1");
328 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
329 /// let end = start.to_end().into_owned();
330 ///
331 /// // First, we read a start event...
332 /// assert_eq!(
333 /// reader.read_resolved_event_into(&mut buf).unwrap(),
334 /// (ResolveResult::Bound(ns), Event::Start(start))
335 /// );
336 ///
337 /// // ...then, we could skip all events to the corresponding end event.
338 /// // This call will correctly handle nested <outer> elements.
339 /// // Note, however, that this method does not handle namespaces.
340 /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
341 ///
342 /// // At the end we should get an Eof event, because we ate the whole XML
343 /// assert_eq!(
344 /// reader.read_resolved_event_into(&mut buf).unwrap(),
345 /// (ResolveResult::Unbound, Event::Eof)
346 /// );
347 /// ```
348 ///
349 /// [`Start`]: Event::Start
350 /// [`End`]: Event::End
351 /// [`IllFormed`]: crate::errors::Error::IllFormed
352 /// [`read_to_end()`]: Self::read_to_end
353 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
354 /// [`expand_empty_elements`]: Config::expand_empty_elements
355 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
356 #[inline]
357 pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
358 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
359 // match literally the start name. See `Config::check_end_names` documentation
360 let result = self.reader.read_to_end_into(end, buf)?;
361 // read_to_end_into will consume closing tag. Because nobody can access to its
362 // content anymore, we directly pop namespace of the opening tag
363 self.ns_resolver.pop();
364 Ok(result)
365 }
366
367 /// Reads content between start and end tags, including any markup using
368 /// provided buffer as intermediate storage for events content. This function
369 /// is supposed to be called after you already read a [`Start`] event.
370 ///
371 /// Manages nested cases where parent and child elements have the _literally_
372 /// same name.
373 ///
374 /// This method does not unescape read data, instead it returns content
375 /// "as is" of the XML document. This is because it has no idea what text
376 /// it reads, and if, for example, it contains CDATA section, attempt to
377 /// unescape it content will spoil data.
378 ///
379 /// If your reader created from a string slice or byte array slice, it is
380 /// better to use [`read_text()`] method, because it will not copy bytes
381 /// into intermediate buffer.
382 ///
383 /// # Examples
384 ///
385 /// This example shows, how you can read a HTML content from your XML document.
386 ///
387 /// ```
388 /// # use pretty_assertions::assert_eq;
389 /// # use std::borrow::Cow;
390 /// use quick_xml::events::{BytesStart, Event};
391 /// use quick_xml::reader::NsReader;
392 ///
393 /// let mut reader = NsReader::from_reader("
394 /// <html>
395 /// <title>This is a HTML text</title>
396 /// <p>Usual XML rules does not apply inside it
397 /// <p>For example, elements not needed to be "closed"
398 /// </html>
399 /// ".as_bytes());
400 /// reader.config_mut().trim_text(true);
401 ///
402 /// let start = BytesStart::new("html");
403 /// let end = start.to_end().into_owned();
404 ///
405 /// let mut buf = Vec::new();
406 ///
407 /// // First, we read a start event...
408 /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start));
409 /// // ...and disable checking of end names because we expect HTML further...
410 /// reader.config_mut().check_end_names = false;
411 ///
412 /// // ...then, we could read text content until close tag.
413 /// // This call will correctly handle nested <html> elements.
414 /// let text = reader.read_text_into(end.name(), &mut buf).unwrap();
415 /// let text = text.decode().unwrap();
416 /// assert_eq!(text, r#"
417 /// <title>This is a HTML text</title>
418 /// <p>Usual XML rules does not apply inside it
419 /// <p>For example, elements not needed to be "closed"
420 /// "#);
421 /// assert!(matches!(text, Cow::Borrowed(_)));
422 ///
423 /// // Now we can enable checks again
424 /// reader.config_mut().check_end_names = true;
425 ///
426 /// // At the end we should get an Eof event, because we ate the whole XML
427 /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
428 /// ```
429 ///
430 /// [`Start`]: Event::Start
431 /// [`read_text()`]: Self::read_text()
432 #[inline]
433 pub fn read_text_into<'b>(
434 &mut self,
435 end: QName,
436 buf: &'b mut Vec<u8>,
437 ) -> Result<BytesText<'b>> {
438 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
439 // match literally the start name. See `Self::check_end_names` documentation
440 let result = self.reader.read_text_into(end, buf)?;
441 // read_text_into will consume closing tag. Because nobody can access to its
442 // content anymore, we directly pop namespace of the opening tag
443 self.ns_resolver.pop();
444 Ok(result)
445 }
446}
447
448impl NsReader<BufReader<File>> {
449 /// Creates an XML reader from a file path.
450 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
451 Ok(Self::new(Reader::from_file(path)?))
452 }
453}
454
455impl<'i> NsReader<&'i [u8]> {
456 /// Creates an XML reader from a string slice.
457 #[inline]
458 #[allow(clippy::should_implement_trait)]
459 pub fn from_str(s: &'i str) -> Self {
460 Self::new(Reader::from_str(s))
461 }
462
463 /// Reads the next event, borrow its content from the input buffer.
464 ///
465 /// This method manages namespaces but doesn't resolve them automatically.
466 /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
467 ///
468 /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
469 /// as soon as you get an event.
470 ///
471 /// There is no asynchronous `read_event_async()` version of this function,
472 /// because it is not necessary -- the contents are already in memory and no IO
473 /// is needed, therefore there is no potential for blocking.
474 ///
475 /// # Examples
476 ///
477 /// ```
478 /// # use pretty_assertions::assert_eq;
479 /// use quick_xml::events::Event;
480 /// use quick_xml::name::{Namespace, ResolveResult::*};
481 /// use quick_xml::reader::NsReader;
482 ///
483 /// let mut reader = NsReader::from_str(r#"
484 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
485 /// <y:tag2><!--Test comment-->Test</y:tag2>
486 /// <y:tag2>Test 2</y:tag2>
487 /// </x:tag1>
488 /// "#);
489 /// reader.config_mut().trim_text(true);
490 ///
491 /// let mut count = 0;
492 /// let mut txt = Vec::new();
493 /// loop {
494 /// match reader.read_event().unwrap() {
495 /// Event::Start(e) => {
496 /// count += 1;
497 /// let (ns, local) = reader.resolver().resolve_element(e.name());
498 /// match local.as_ref() {
499 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
500 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
501 /// _ => unreachable!(),
502 /// }
503 /// }
504 /// Event::Text(e) => {
505 /// txt.push(e.decode().unwrap().into_owned())
506 /// }
507 /// Event::Eof => break,
508 /// _ => (),
509 /// }
510 /// }
511 /// assert_eq!(count, 3);
512 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
513 /// ```
514 ///
515 /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
516 /// [`read_resolved_event()`]: Self::read_resolved_event
517 #[inline]
518 pub fn read_event(&mut self) -> Result<Event<'i>> {
519 self.read_event_impl(())
520 }
521
522 /// Reads the next event, borrow its content from the input buffer, and resolves
523 /// its namespace (if applicable).
524 ///
525 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
526 /// For all other events the concept of namespace is not defined, so
527 /// a [`ResolveResult::Unbound`] is returned.
528 ///
529 /// If you are not interested in namespaces, you can use [`read_event()`]
530 /// which will not automatically resolve namespaces for you.
531 ///
532 /// There is no asynchronous `read_resolved_event_async()` version of this function,
533 /// because it is not necessary -- the contents are already in memory and no IO
534 /// is needed, therefore there is no potential for blocking.
535 ///
536 /// # Examples
537 ///
538 /// ```
539 /// # use pretty_assertions::assert_eq;
540 /// use quick_xml::events::Event;
541 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
542 /// use quick_xml::reader::NsReader;
543 ///
544 /// let mut reader = NsReader::from_str(r#"
545 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
546 /// <y:tag2><!--Test comment-->Test</y:tag2>
547 /// <y:tag2>Test 2</y:tag2>
548 /// </x:tag1>
549 /// "#);
550 /// reader.config_mut().trim_text(true);
551 ///
552 /// let mut count = 0;
553 /// let mut txt = Vec::new();
554 /// loop {
555 /// match reader.read_resolved_event().unwrap() {
556 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
557 /// count += 1;
558 /// assert_eq!(e.local_name(), QName(b"tag1").into());
559 /// }
560 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
561 /// count += 1;
562 /// assert_eq!(e.local_name(), QName(b"tag2").into());
563 /// }
564 /// (_, Event::Start(_)) => unreachable!(),
565 ///
566 /// (_, Event::Text(e)) => {
567 /// txt.push(e.decode().unwrap().into_owned())
568 /// }
569 /// (_, Event::Eof) => break,
570 /// _ => (),
571 /// }
572 /// }
573 /// assert_eq!(count, 3);
574 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
575 /// ```
576 ///
577 /// [`Start`]: Event::Start
578 /// [`Empty`]: Event::Empty
579 /// [`End`]: Event::End
580 /// [`read_event()`]: Self::read_event
581 #[inline]
582 pub fn read_resolved_event(&mut self) -> Result<(ResolveResult<'_>, Event<'i>)> {
583 let event = self.read_event_impl(())?;
584 Ok(self.ns_resolver.resolve_event(event))
585 }
586
587 /// Reads until end element is found. This function is supposed to be called
588 /// after you already read a [`Start`] event.
589 ///
590 /// Returns a span that cover content between `>` of an opening tag and `<` of
591 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
592 /// this method was called after reading expanded [`Start`] event.
593 ///
594 /// Manages nested cases where parent and child elements have the _literally_
595 /// same name.
596 ///
597 /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
598 /// will be returned. In particularly, that error will be returned if you call
599 /// this method without consuming the corresponding [`Start`] event first.
600 ///
601 /// The `end` parameter should contain name of the end element _in the reader
602 /// encoding_. It is good practice to always get that parameter using
603 /// [`BytesStart::to_end()`] method.
604 ///
605 /// There is no asynchronous `read_to_end_async()` version of this function,
606 /// because it is not necessary -- the contents are already in memory and no IO
607 /// is needed, therefore there is no potential for blocking.
608 ///
609 /// # Namespaces
610 ///
611 /// While the `NsReader` does namespace resolution, namespaces does not
612 /// change the algorithm for comparing names. Although the names `a:name`
613 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
614 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
615 /// according to [the specification]
616 ///
617 /// > The end of every element that begins with a **start-tag** MUST be marked
618 /// > by an **end-tag** containing a name that echoes the element's type as
619 /// > given in the **start-tag**
620 ///
621 /// # Examples
622 ///
623 /// This example shows, how you can skip XML content after you read the
624 /// start event.
625 ///
626 /// ```
627 /// # use pretty_assertions::assert_eq;
628 /// use quick_xml::events::{BytesStart, Event};
629 /// use quick_xml::name::{Namespace, ResolveResult};
630 /// use quick_xml::reader::NsReader;
631 ///
632 /// let mut reader = NsReader::from_str(r#"
633 /// <outer xmlns="namespace 1">
634 /// <inner xmlns="namespace 2">
635 /// <outer></outer>
636 /// </inner>
637 /// <inner>
638 /// <inner></inner>
639 /// <inner/>
640 /// <outer></outer>
641 /// <p:outer xmlns:p="ns"></p:outer>
642 /// <outer/>
643 /// </inner>
644 /// </outer>
645 /// "#);
646 /// reader.config_mut().trim_text(true);
647 ///
648 /// let ns = Namespace(b"namespace 1");
649 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
650 /// let end = start.to_end().into_owned();
651 ///
652 /// // First, we read a start event...
653 /// assert_eq!(
654 /// reader.read_resolved_event().unwrap(),
655 /// (ResolveResult::Bound(ns), Event::Start(start))
656 /// );
657 ///
658 /// // ...then, we could skip all events to the corresponding end event.
659 /// // This call will correctly handle nested <outer> elements.
660 /// // Note, however, that this method does not handle namespaces.
661 /// reader.read_to_end(end.name()).unwrap();
662 ///
663 /// // At the end we should get an Eof event, because we ate the whole XML
664 /// assert_eq!(
665 /// reader.read_resolved_event().unwrap(),
666 /// (ResolveResult::Unbound, Event::Eof)
667 /// );
668 /// ```
669 ///
670 /// [`Start`]: Event::Start
671 /// [`End`]: Event::End
672 /// [`IllFormed`]: crate::errors::Error::IllFormed
673 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
674 /// [`expand_empty_elements`]: Config::expand_empty_elements
675 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
676 #[inline]
677 pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
678 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
679 // match literally the start name. See `Config::check_end_names` documentation
680 let result = self.reader.read_to_end(end)?;
681 // read_to_end will consume closing tag. Because nobody can access to its
682 // content anymore, we directly pop namespace of the opening tag
683 self.ns_resolver.pop();
684 Ok(result)
685 }
686
687 /// Reads content between start and end tags, including any markup. This
688 /// function is supposed to be called after you already read a [`Start`] event.
689 ///
690 /// Manages nested cases where parent and child elements have the _literally_
691 /// same name.
692 ///
693 /// This method does not unescape read data, instead it returns content
694 /// "as is" of the XML document. This is because it has no idea what text
695 /// it reads, and if, for example, it contains CDATA section, attempt to
696 /// unescape it content will spoil data.
697 ///
698 /// Any text will be decoded using the XML current [`decoder()`].
699 ///
700 /// Actually, this method perform the following code:
701 ///
702 /// ```ignore
703 /// let span = reader.read_to_end(end)?;
704 /// let text = reader.decoder().decode(&reader.inner_slice[span]);
705 /// ```
706 ///
707 /// # Examples
708 ///
709 /// This example shows, how you can read a HTML content from your XML document.
710 ///
711 /// ```
712 /// # use pretty_assertions::assert_eq;
713 /// # use std::borrow::Cow;
714 /// use quick_xml::events::{BytesStart, Event};
715 /// use quick_xml::reader::NsReader;
716 ///
717 /// let mut reader = NsReader::from_str(r#"
718 /// <html>
719 /// <title>This is a HTML text</title>
720 /// <p>Usual XML rules does not apply inside it
721 /// <p>For example, elements not needed to be "closed"
722 /// </html>
723 /// "#);
724 /// reader.config_mut().trim_text(true);
725 ///
726 /// let start = BytesStart::new("html");
727 /// let end = start.to_end().into_owned();
728 ///
729 /// // First, we read a start event...
730 /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
731 /// // ...and disable checking of end names because we expect HTML further...
732 /// reader.config_mut().check_end_names = false;
733 ///
734 /// // ...then, we could read text content until close tag.
735 /// // This call will correctly handle nested <html> elements.
736 /// let text = reader.read_text(end.name()).unwrap();
737 /// let text = text.decode().unwrap();
738 /// assert_eq!(text, r#"
739 /// <title>This is a HTML text</title>
740 /// <p>Usual XML rules does not apply inside it
741 /// <p>For example, elements not needed to be "closed"
742 /// "#);
743 /// assert!(matches!(text, Cow::Borrowed(_)));
744 ///
745 /// // Now we can enable checks again
746 /// reader.config_mut().check_end_names = true;
747 ///
748 /// // At the end we should get an Eof event, because we ate the whole XML
749 /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
750 /// ```
751 ///
752 /// [`Start`]: Event::Start
753 /// [`decoder()`]: Reader::decoder()
754 #[inline]
755 pub fn read_text(&mut self, end: QName) -> Result<BytesText<'i>> {
756 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
757 // match literally the start name. See `Self::check_end_names` documentation
758 let result = self.reader.read_text(end)?;
759 // read_text will consume closing tag. Because nobody can access to its
760 // content anymore, we directly pop namespace of the opening tag
761 self.ns_resolver.pop();
762 Ok(result)
763 }
764}
765
766impl<R> Deref for NsReader<R> {
767 type Target = Reader<R>;
768
769 #[inline]
770 fn deref(&self) -> &Self::Target {
771 &self.reader
772 }
773}