quick_xml/reader/
ns_reader.rs

1//! A reader that manages namespace declarations found in the input and able
2//! to resolve [qualified names] to [expanded names].
3//!
4//! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
5//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
6
7use std::fs::File;
8use std::io::{BufRead, BufReader};
9use std::ops::Deref;
10use std::path::Path;
11
12use crate::errors::Result;
13use crate::events::{BytesText, Event};
14use crate::name::{NamespaceResolver, QName, ResolveResult};
15use crate::reader::{Config, Reader, Span, XmlSource};
16
17/// A low level encoding-agnostic XML event reader that performs namespace resolution.
18///
19/// Consumes a [`BufRead`] and streams XML `Event`s.
20#[derive(Debug, Clone)]
21pub struct NsReader<R> {
22    /// An XML reader
23    pub(super) reader: Reader<R>,
24    /// A buffer to manage namespaces
25    pub(super) ns_resolver: NamespaceResolver,
26    /// We cannot pop data from the namespace stack until returned `Empty` or `End`
27    /// event will be processed by the user, so we only mark that we should that
28    /// in the next [`Self::read_event_impl()`] call.
29    pending_pop: bool,
30}
31
32/// Builder methods
33impl<R> NsReader<R> {
34    /// Creates a `NsReader` that reads from a reader.
35    #[inline]
36    pub fn from_reader(reader: R) -> Self {
37        Self::new(Reader::from_reader(reader))
38    }
39
40    /// Returns reference to the parser configuration
41    #[inline]
42    pub const fn config(&self) -> &Config {
43        self.reader.config()
44    }
45
46    /// Returns mutable reference to the parser configuration
47    #[inline]
48    pub fn config_mut(&mut self) -> &mut Config {
49        self.reader.config_mut()
50    }
51}
52
53/// Private methods
54impl<R> NsReader<R> {
55    #[inline]
56    fn new(reader: Reader<R>) -> Self {
57        Self {
58            reader,
59            ns_resolver: NamespaceResolver::default(),
60            pending_pop: false,
61        }
62    }
63
64    fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
65    where
66        R: XmlSource<'i, B>,
67    {
68        self.pop();
69        let event = self.reader.read_event_impl(buf);
70        self.process_event(event)
71    }
72
73    pub(super) fn pop(&mut self) {
74        if self.pending_pop {
75            self.ns_resolver.pop();
76            self.pending_pop = false;
77        }
78    }
79
80    pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
81        match event {
82            Ok(Event::Start(e)) => {
83                self.ns_resolver.push(&e)?;
84                Ok(Event::Start(e))
85            }
86            Ok(Event::Empty(e)) => {
87                self.ns_resolver.push(&e)?;
88                // notify next `read_event_impl()` invocation that it needs to pop this
89                // namespace scope
90                self.pending_pop = true;
91                Ok(Event::Empty(e))
92            }
93            Ok(Event::End(e)) => {
94                // notify next `read_event_impl()` invocation that it needs to pop this
95                // namespace scope
96                self.pending_pop = true;
97                Ok(Event::End(e))
98            }
99            e => e,
100        }
101    }
102}
103
104/// Getters
105impl<R> NsReader<R> {
106    /// Consumes `NsReader` returning the underlying reader
107    ///
108    /// See the [`Reader::into_inner`] for examples
109    #[inline]
110    pub fn into_inner(self) -> R {
111        self.reader.into_inner()
112    }
113
114    /// Gets a mutable reference to the underlying reader.
115    pub fn get_mut(&mut self) -> &mut R {
116        self.reader.get_mut()
117    }
118
119    /// Returns a storage of namespace bindings associated with this reader.
120    #[inline]
121    pub const fn resolver(&self) -> &NamespaceResolver {
122        &self.ns_resolver
123    }
124
125    /// Returns a mutable reference to the storage of namespace bindings
126    /// associated with this reader.
127    ///
128    /// Useful for configuring the resolver, e.g. to change the
129    /// [per-element namespace-declaration limit](NamespaceResolver::set_max_declarations_per_element).
130    #[inline]
131    pub fn resolver_mut(&mut self) -> &mut NamespaceResolver {
132        &mut self.ns_resolver
133    }
134}
135
136impl<R: BufRead> NsReader<R> {
137    /// Reads the next event into given buffer.
138    ///
139    /// This method manages namespaces but doesn't resolve them automatically.
140    /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
141    ///
142    /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
143    /// namespace as soon as you get an event.
144    ///
145    /// # Examples
146    ///
147    /// ```
148    /// # use pretty_assertions::assert_eq;
149    /// use quick_xml::events::Event;
150    /// use quick_xml::name::{Namespace, ResolveResult::*};
151    /// use quick_xml::reader::NsReader;
152    ///
153    /// let mut reader = NsReader::from_str(r#"
154    ///     <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
155    ///        <y:tag2><!--Test comment-->Test</y:tag2>
156    ///        <y:tag2>Test 2</y:tag2>
157    ///     </x:tag1>
158    /// "#);
159    /// reader.config_mut().trim_text(true);
160    ///
161    /// let mut count = 0;
162    /// let mut buf = Vec::new();
163    /// let mut txt = Vec::new();
164    /// loop {
165    ///     match reader.read_event_into(&mut buf).unwrap() {
166    ///         Event::Start(e) => {
167    ///             count += 1;
168    ///             let (ns, local) = reader.resolver().resolve_element(e.name());
169    ///             match local.as_ref() {
170    ///                 b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
171    ///                 b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
172    ///                 _ => unreachable!(),
173    ///             }
174    ///         }
175    ///         Event::Text(e) => {
176    ///             txt.push(e.decode().unwrap().into_owned())
177    ///         }
178    ///         Event::Eof => break,
179    ///         _ => (),
180    ///     }
181    ///     buf.clear();
182    /// }
183    /// assert_eq!(count, 3);
184    /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
185    /// ```
186    ///
187    /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
188    /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
189    #[inline]
190    pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
191        self.read_event_impl(buf)
192    }
193
194    /// Reads the next event into given buffer and resolves its namespace (if applicable).
195    ///
196    /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
197    /// For all other events the concept of namespace is not defined, so
198    /// a [`ResolveResult::Unbound`] is returned.
199    ///
200    /// If you are not interested in namespaces, you can use [`read_event_into()`]
201    /// which will not automatically resolve namespaces for you.
202    ///
203    /// # Examples
204    ///
205    /// ```
206    /// # use pretty_assertions::assert_eq;
207    /// use quick_xml::events::Event;
208    /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
209    /// use quick_xml::reader::NsReader;
210    ///
211    /// let mut reader = NsReader::from_str(r#"
212    ///     <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
213    ///        <y:tag2><!--Test comment-->Test</y:tag2>
214    ///        <y:tag2>Test 2</y:tag2>
215    ///     </x:tag1>
216    /// "#);
217    /// reader.config_mut().trim_text(true);
218    ///
219    /// let mut count = 0;
220    /// let mut buf = Vec::new();
221    /// let mut txt = Vec::new();
222    /// loop {
223    ///     match reader.read_resolved_event_into(&mut buf).unwrap() {
224    ///         (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
225    ///             count += 1;
226    ///             assert_eq!(e.local_name(), QName(b"tag1").into());
227    ///         }
228    ///         (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
229    ///             count += 1;
230    ///             assert_eq!(e.local_name(), QName(b"tag2").into());
231    ///         }
232    ///         (_, Event::Start(_)) => unreachable!(),
233    ///
234    ///         (_, Event::Text(e)) => {
235    ///             txt.push(e.decode().unwrap().into_owned())
236    ///         }
237    ///         (_, Event::Eof) => break,
238    ///         _ => (),
239    ///     }
240    ///     buf.clear();
241    /// }
242    /// assert_eq!(count, 3);
243    /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
244    /// ```
245    ///
246    /// [`Start`]: Event::Start
247    /// [`Empty`]: Event::Empty
248    /// [`End`]: Event::End
249    /// [`read_event_into()`]: Self::read_event_into
250    #[inline]
251    pub fn read_resolved_event_into<'b>(
252        &mut self,
253        buf: &'b mut Vec<u8>,
254    ) -> Result<(ResolveResult<'_>, Event<'b>)> {
255        let event = self.read_event_impl(buf)?;
256        Ok(self.ns_resolver.resolve_event(event))
257    }
258
259    /// Reads until end element is found using provided buffer as intermediate
260    /// storage for events content. This function is supposed to be called after
261    /// you already read a [`Start`] event.
262    ///
263    /// Returns a span that cover content between `>` of an opening tag and `<` of
264    /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
265    /// this method was called after reading expanded [`Start`] event.
266    ///
267    /// Manages nested cases where parent and child elements have the _literally_
268    /// same name.
269    ///
270    /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
271    /// will be returned. In particularly, that error will be returned if you call
272    /// this method without consuming the corresponding [`Start`] event first.
273    ///
274    /// If your reader created from a string slice or byte array slice, it is
275    /// better to use [`read_to_end()`] method, because it will not copy bytes
276    /// into intermediate buffer.
277    ///
278    /// The provided `buf` buffer will be filled only by one event content at time.
279    /// Before reading of each event the buffer will be cleared. If you know an
280    /// appropriate size of each event, you can preallocate the buffer to reduce
281    /// number of reallocations.
282    ///
283    /// The `end` parameter should contain name of the end element _in the reader
284    /// encoding_. It is good practice to always get that parameter using
285    /// [`BytesStart::to_end()`] method.
286    ///
287    /// # Namespaces
288    ///
289    /// While the `NsReader` does namespace resolution, namespaces does not
290    /// change the algorithm for comparing names. Although the names `a:name`
291    /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
292    /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
293    /// according to [the specification]
294    ///
295    /// > The end of every element that begins with a **start-tag** MUST be marked
296    /// > by an **end-tag** containing a name that echoes the element's type as
297    /// > given in the **start-tag**
298    ///
299    /// # Examples
300    ///
301    /// This example shows, how you can skip XML content after you read the
302    /// start event.
303    ///
304    /// ```
305    /// # use pretty_assertions::assert_eq;
306    /// use quick_xml::events::{BytesStart, Event};
307    /// use quick_xml::name::{Namespace, ResolveResult};
308    /// use quick_xml::reader::NsReader;
309    ///
310    /// let mut reader = NsReader::from_str(r#"
311    ///     <outer xmlns="namespace 1">
312    ///         <inner xmlns="namespace 2">
313    ///             <outer></outer>
314    ///         </inner>
315    ///         <inner>
316    ///             <inner></inner>
317    ///             <inner/>
318    ///             <outer></outer>
319    ///             <p:outer xmlns:p="ns"></p:outer>
320    ///             <outer/>
321    ///         </inner>
322    ///     </outer>
323    /// "#);
324    /// reader.config_mut().trim_text(true);
325    /// let mut buf = Vec::new();
326    ///
327    /// let ns = Namespace(b"namespace 1");
328    /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
329    /// let end   = start.to_end().into_owned();
330    ///
331    /// // First, we read a start event...
332    /// assert_eq!(
333    ///     reader.read_resolved_event_into(&mut buf).unwrap(),
334    ///     (ResolveResult::Bound(ns), Event::Start(start))
335    /// );
336    ///
337    /// // ...then, we could skip all events to the corresponding end event.
338    /// // This call will correctly handle nested <outer> elements.
339    /// // Note, however, that this method does not handle namespaces.
340    /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
341    ///
342    /// // At the end we should get an Eof event, because we ate the whole XML
343    /// assert_eq!(
344    ///     reader.read_resolved_event_into(&mut buf).unwrap(),
345    ///     (ResolveResult::Unbound, Event::Eof)
346    /// );
347    /// ```
348    ///
349    /// [`Start`]: Event::Start
350    /// [`End`]: Event::End
351    /// [`IllFormed`]: crate::errors::Error::IllFormed
352    /// [`read_to_end()`]: Self::read_to_end
353    /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
354    /// [`expand_empty_elements`]: Config::expand_empty_elements
355    /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
356    #[inline]
357    pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
358        // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
359        // match literally the start name. See `Config::check_end_names` documentation
360        let result = self.reader.read_to_end_into(end, buf)?;
361        // read_to_end_into will consume closing tag. Because nobody can access to its
362        // content anymore, we directly pop namespace of the opening tag
363        self.ns_resolver.pop();
364        Ok(result)
365    }
366
367    /// Reads content between start and end tags, including any markup using
368    /// provided buffer as intermediate storage for events content. This function
369    /// is supposed to be called after you already read a [`Start`] event.
370    ///
371    /// Manages nested cases where parent and child elements have the _literally_
372    /// same name.
373    ///
374    /// This method does not unescape read data, instead it returns content
375    /// "as is" of the XML document. This is because it has no idea what text
376    /// it reads, and if, for example, it contains CDATA section, attempt to
377    /// unescape it content will spoil data.
378    ///
379    /// If your reader created from a string slice or byte array slice, it is
380    /// better to use [`read_text()`] method, because it will not copy bytes
381    /// into intermediate buffer.
382    ///
383    /// # Examples
384    ///
385    /// This example shows, how you can read a HTML content from your XML document.
386    ///
387    /// ```
388    /// # use pretty_assertions::assert_eq;
389    /// # use std::borrow::Cow;
390    /// use quick_xml::events::{BytesStart, Event};
391    /// use quick_xml::reader::NsReader;
392    ///
393    /// let mut reader = NsReader::from_reader("
394    ///     <html>
395    ///         <title>This is a HTML text</title>
396    ///         <p>Usual XML rules does not apply inside it
397    ///         <p>For example, elements not needed to be &quot;closed&quot;
398    ///     </html>
399    /// ".as_bytes());
400    /// reader.config_mut().trim_text(true);
401    ///
402    /// let start = BytesStart::new("html");
403    /// let end   = start.to_end().into_owned();
404    ///
405    /// let mut buf = Vec::new();
406    ///
407    /// // First, we read a start event...
408    /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start));
409    /// // ...and disable checking of end names because we expect HTML further...
410    /// reader.config_mut().check_end_names = false;
411    ///
412    /// // ...then, we could read text content until close tag.
413    /// // This call will correctly handle nested <html> elements.
414    /// let text = reader.read_text_into(end.name(), &mut buf).unwrap();
415    /// let text = text.decode().unwrap();
416    /// assert_eq!(text, r#"
417    ///         <title>This is a HTML text</title>
418    ///         <p>Usual XML rules does not apply inside it
419    ///         <p>For example, elements not needed to be &quot;closed&quot;
420    ///     "#);
421    /// assert!(matches!(text, Cow::Borrowed(_)));
422    ///
423    /// // Now we can enable checks again
424    /// reader.config_mut().check_end_names = true;
425    ///
426    /// // At the end we should get an Eof event, because we ate the whole XML
427    /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
428    /// ```
429    ///
430    /// [`Start`]: Event::Start
431    /// [`read_text()`]: Self::read_text()
432    #[inline]
433    pub fn read_text_into<'b>(
434        &mut self,
435        end: QName,
436        buf: &'b mut Vec<u8>,
437    ) -> Result<BytesText<'b>> {
438        // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
439        // match literally the start name. See `Self::check_end_names` documentation
440        let result = self.reader.read_text_into(end, buf)?;
441        // read_text_into will consume closing tag. Because nobody can access to its
442        // content anymore, we directly pop namespace of the opening tag
443        self.ns_resolver.pop();
444        Ok(result)
445    }
446}
447
448impl NsReader<BufReader<File>> {
449    /// Creates an XML reader from a file path.
450    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
451        Ok(Self::new(Reader::from_file(path)?))
452    }
453}
454
455impl<'i> NsReader<&'i [u8]> {
456    /// Creates an XML reader from a string slice.
457    #[inline]
458    #[allow(clippy::should_implement_trait)]
459    pub fn from_str(s: &'i str) -> Self {
460        Self::new(Reader::from_str(s))
461    }
462
463    /// Reads the next event, borrow its content from the input buffer.
464    ///
465    /// This method manages namespaces but doesn't resolve them automatically.
466    /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
467    ///
468    /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
469    /// as soon as you get an event.
470    ///
471    /// There is no asynchronous `read_event_async()` version of this function,
472    /// because it is not necessary -- the contents are already in memory and no IO
473    /// is needed, therefore there is no potential for blocking.
474    ///
475    /// # Examples
476    ///
477    /// ```
478    /// # use pretty_assertions::assert_eq;
479    /// use quick_xml::events::Event;
480    /// use quick_xml::name::{Namespace, ResolveResult::*};
481    /// use quick_xml::reader::NsReader;
482    ///
483    /// let mut reader = NsReader::from_str(r#"
484    ///     <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
485    ///        <y:tag2><!--Test comment-->Test</y:tag2>
486    ///        <y:tag2>Test 2</y:tag2>
487    ///     </x:tag1>
488    /// "#);
489    /// reader.config_mut().trim_text(true);
490    ///
491    /// let mut count = 0;
492    /// let mut txt = Vec::new();
493    /// loop {
494    ///     match reader.read_event().unwrap() {
495    ///         Event::Start(e) => {
496    ///             count += 1;
497    ///             let (ns, local) = reader.resolver().resolve_element(e.name());
498    ///             match local.as_ref() {
499    ///                 b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
500    ///                 b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
501    ///                 _ => unreachable!(),
502    ///             }
503    ///         }
504    ///         Event::Text(e) => {
505    ///             txt.push(e.decode().unwrap().into_owned())
506    ///         }
507    ///         Event::Eof => break,
508    ///         _ => (),
509    ///     }
510    /// }
511    /// assert_eq!(count, 3);
512    /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
513    /// ```
514    ///
515    /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
516    /// [`read_resolved_event()`]: Self::read_resolved_event
517    #[inline]
518    pub fn read_event(&mut self) -> Result<Event<'i>> {
519        self.read_event_impl(())
520    }
521
522    /// Reads the next event, borrow its content from the input buffer, and resolves
523    /// its namespace (if applicable).
524    ///
525    /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
526    /// For all other events the concept of namespace is not defined, so
527    /// a [`ResolveResult::Unbound`] is returned.
528    ///
529    /// If you are not interested in namespaces, you can use [`read_event()`]
530    /// which will not automatically resolve namespaces for you.
531    ///
532    /// There is no asynchronous `read_resolved_event_async()` version of this function,
533    /// because it is not necessary -- the contents are already in memory and no IO
534    /// is needed, therefore there is no potential for blocking.
535    ///
536    /// # Examples
537    ///
538    /// ```
539    /// # use pretty_assertions::assert_eq;
540    /// use quick_xml::events::Event;
541    /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
542    /// use quick_xml::reader::NsReader;
543    ///
544    /// let mut reader = NsReader::from_str(r#"
545    ///     <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
546    ///        <y:tag2><!--Test comment-->Test</y:tag2>
547    ///        <y:tag2>Test 2</y:tag2>
548    ///     </x:tag1>
549    /// "#);
550    /// reader.config_mut().trim_text(true);
551    ///
552    /// let mut count = 0;
553    /// let mut txt = Vec::new();
554    /// loop {
555    ///     match reader.read_resolved_event().unwrap() {
556    ///         (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
557    ///             count += 1;
558    ///             assert_eq!(e.local_name(), QName(b"tag1").into());
559    ///         }
560    ///         (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
561    ///             count += 1;
562    ///             assert_eq!(e.local_name(), QName(b"tag2").into());
563    ///         }
564    ///         (_, Event::Start(_)) => unreachable!(),
565    ///
566    ///         (_, Event::Text(e)) => {
567    ///             txt.push(e.decode().unwrap().into_owned())
568    ///         }
569    ///         (_, Event::Eof) => break,
570    ///         _ => (),
571    ///     }
572    /// }
573    /// assert_eq!(count, 3);
574    /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
575    /// ```
576    ///
577    /// [`Start`]: Event::Start
578    /// [`Empty`]: Event::Empty
579    /// [`End`]: Event::End
580    /// [`read_event()`]: Self::read_event
581    #[inline]
582    pub fn read_resolved_event(&mut self) -> Result<(ResolveResult<'_>, Event<'i>)> {
583        let event = self.read_event_impl(())?;
584        Ok(self.ns_resolver.resolve_event(event))
585    }
586
587    /// Reads until end element is found. This function is supposed to be called
588    /// after you already read a [`Start`] event.
589    ///
590    /// Returns a span that cover content between `>` of an opening tag and `<` of
591    /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
592    /// this method was called after reading expanded [`Start`] event.
593    ///
594    /// Manages nested cases where parent and child elements have the _literally_
595    /// same name.
596    ///
597    /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
598    /// will be returned. In particularly, that error will be returned if you call
599    /// this method without consuming the corresponding [`Start`] event first.
600    ///
601    /// The `end` parameter should contain name of the end element _in the reader
602    /// encoding_. It is good practice to always get that parameter using
603    /// [`BytesStart::to_end()`] method.
604    ///
605    /// There is no asynchronous `read_to_end_async()` version of this function,
606    /// because it is not necessary -- the contents are already in memory and no IO
607    /// is needed, therefore there is no potential for blocking.
608    ///
609    /// # Namespaces
610    ///
611    /// While the `NsReader` does namespace resolution, namespaces does not
612    /// change the algorithm for comparing names. Although the names `a:name`
613    /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
614    /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
615    /// according to [the specification]
616    ///
617    /// > The end of every element that begins with a **start-tag** MUST be marked
618    /// > by an **end-tag** containing a name that echoes the element's type as
619    /// > given in the **start-tag**
620    ///
621    /// # Examples
622    ///
623    /// This example shows, how you can skip XML content after you read the
624    /// start event.
625    ///
626    /// ```
627    /// # use pretty_assertions::assert_eq;
628    /// use quick_xml::events::{BytesStart, Event};
629    /// use quick_xml::name::{Namespace, ResolveResult};
630    /// use quick_xml::reader::NsReader;
631    ///
632    /// let mut reader = NsReader::from_str(r#"
633    ///     <outer xmlns="namespace 1">
634    ///         <inner xmlns="namespace 2">
635    ///             <outer></outer>
636    ///         </inner>
637    ///         <inner>
638    ///             <inner></inner>
639    ///             <inner/>
640    ///             <outer></outer>
641    ///             <p:outer xmlns:p="ns"></p:outer>
642    ///             <outer/>
643    ///         </inner>
644    ///     </outer>
645    /// "#);
646    /// reader.config_mut().trim_text(true);
647    ///
648    /// let ns = Namespace(b"namespace 1");
649    /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
650    /// let end   = start.to_end().into_owned();
651    ///
652    /// // First, we read a start event...
653    /// assert_eq!(
654    ///     reader.read_resolved_event().unwrap(),
655    ///     (ResolveResult::Bound(ns), Event::Start(start))
656    /// );
657    ///
658    /// // ...then, we could skip all events to the corresponding end event.
659    /// // This call will correctly handle nested <outer> elements.
660    /// // Note, however, that this method does not handle namespaces.
661    /// reader.read_to_end(end.name()).unwrap();
662    ///
663    /// // At the end we should get an Eof event, because we ate the whole XML
664    /// assert_eq!(
665    ///     reader.read_resolved_event().unwrap(),
666    ///     (ResolveResult::Unbound, Event::Eof)
667    /// );
668    /// ```
669    ///
670    /// [`Start`]: Event::Start
671    /// [`End`]: Event::End
672    /// [`IllFormed`]: crate::errors::Error::IllFormed
673    /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
674    /// [`expand_empty_elements`]: Config::expand_empty_elements
675    /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
676    #[inline]
677    pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
678        // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
679        // match literally the start name. See `Config::check_end_names` documentation
680        let result = self.reader.read_to_end(end)?;
681        // read_to_end will consume closing tag. Because nobody can access to its
682        // content anymore, we directly pop namespace of the opening tag
683        self.ns_resolver.pop();
684        Ok(result)
685    }
686
687    /// Reads content between start and end tags, including any markup. This
688    /// function is supposed to be called after you already read a [`Start`] event.
689    ///
690    /// Manages nested cases where parent and child elements have the _literally_
691    /// same name.
692    ///
693    /// This method does not unescape read data, instead it returns content
694    /// "as is" of the XML document. This is because it has no idea what text
695    /// it reads, and if, for example, it contains CDATA section, attempt to
696    /// unescape it content will spoil data.
697    ///
698    /// Any text will be decoded using the XML current [`decoder()`].
699    ///
700    /// Actually, this method perform the following code:
701    ///
702    /// ```ignore
703    /// let span = reader.read_to_end(end)?;
704    /// let text = reader.decoder().decode(&reader.inner_slice[span]);
705    /// ```
706    ///
707    /// # Examples
708    ///
709    /// This example shows, how you can read a HTML content from your XML document.
710    ///
711    /// ```
712    /// # use pretty_assertions::assert_eq;
713    /// # use std::borrow::Cow;
714    /// use quick_xml::events::{BytesStart, Event};
715    /// use quick_xml::reader::NsReader;
716    ///
717    /// let mut reader = NsReader::from_str(r#"
718    ///     <html>
719    ///         <title>This is a HTML text</title>
720    ///         <p>Usual XML rules does not apply inside it
721    ///         <p>For example, elements not needed to be &quot;closed&quot;
722    ///     </html>
723    /// "#);
724    /// reader.config_mut().trim_text(true);
725    ///
726    /// let start = BytesStart::new("html");
727    /// let end   = start.to_end().into_owned();
728    ///
729    /// // First, we read a start event...
730    /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
731    /// // ...and disable checking of end names because we expect HTML further...
732    /// reader.config_mut().check_end_names = false;
733    ///
734    /// // ...then, we could read text content until close tag.
735    /// // This call will correctly handle nested <html> elements.
736    /// let text = reader.read_text(end.name()).unwrap();
737    /// let text = text.decode().unwrap();
738    /// assert_eq!(text, r#"
739    ///         <title>This is a HTML text</title>
740    ///         <p>Usual XML rules does not apply inside it
741    ///         <p>For example, elements not needed to be &quot;closed&quot;
742    ///     "#);
743    /// assert!(matches!(text, Cow::Borrowed(_)));
744    ///
745    /// // Now we can enable checks again
746    /// reader.config_mut().check_end_names = true;
747    ///
748    /// // At the end we should get an Eof event, because we ate the whole XML
749    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
750    /// ```
751    ///
752    /// [`Start`]: Event::Start
753    /// [`decoder()`]: Reader::decoder()
754    #[inline]
755    pub fn read_text(&mut self, end: QName) -> Result<BytesText<'i>> {
756        // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
757        // match literally the start name. See `Self::check_end_names` documentation
758        let result = self.reader.read_text(end)?;
759        // read_text will consume closing tag. Because nobody can access to its
760        // content anymore, we directly pop namespace of the opening tag
761        self.ns_resolver.pop();
762        Ok(result)
763    }
764}
765
766impl<R> Deref for NsReader<R> {
767    type Target = Reader<R>;
768
769    #[inline]
770    fn deref(&self) -> &Self::Target {
771        &self.reader
772    }
773}
quick_xml/reader/ns_reader.rs

quick_xml/reader/
ns_reader.rs