quick_xml/reader/ns_reader.rs
1//! A reader that manages namespace declarations found in the input and able
2//! to resolve [qualified names] to [expanded names].
3//!
4//! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
5//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
6
7use std::fs::File;
8use std::io::{BufRead, BufReader};
9use std::ops::Deref;
10use std::path::Path;
11
12use crate::errors::Result;
13use crate::events::{BytesText, Event};
14use crate::name::{NamespaceResolver, QName, ResolveResult};
15use crate::reader::{Config, Reader, Span, XmlSource};
16
17/// A low level encoding-agnostic XML event reader that performs namespace resolution.
18///
19/// Consumes a [`BufRead`] and streams XML `Event`s.
20#[derive(Debug, Clone)]
21pub struct NsReader<R> {
22 /// An XML reader
23 pub(super) reader: Reader<R>,
24 /// A buffer to manage namespaces
25 pub(super) ns_resolver: NamespaceResolver,
26 /// We cannot pop data from the namespace stack until returned `Empty` or `End`
27 /// event will be processed by the user, so we only mark that we should that
28 /// in the next [`Self::read_event_impl()`] call.
29 pending_pop: bool,
30}
31
32/// Builder methods
33impl<R> NsReader<R> {
34 /// Creates a `NsReader` that reads from a reader.
35 #[inline]
36 pub fn from_reader(reader: R) -> Self {
37 Self::new(Reader::from_reader(reader))
38 }
39
40 /// Returns reference to the parser configuration
41 #[inline]
42 pub const fn config(&self) -> &Config {
43 self.reader.config()
44 }
45
46 /// Returns mutable reference to the parser configuration
47 #[inline]
48 pub fn config_mut(&mut self) -> &mut Config {
49 self.reader.config_mut()
50 }
51}
52
53/// Private methods
54impl<R> NsReader<R> {
55 #[inline]
56 fn new(reader: Reader<R>) -> Self {
57 Self {
58 reader,
59 ns_resolver: NamespaceResolver::default(),
60 pending_pop: false,
61 }
62 }
63
64 fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
65 where
66 R: XmlSource<'i, B>,
67 {
68 self.pop();
69 let event = self.reader.read_event_impl(buf);
70 self.process_event(event)
71 }
72
73 pub(super) fn pop(&mut self) {
74 if self.pending_pop {
75 self.ns_resolver.pop();
76 self.pending_pop = false;
77 }
78 }
79
80 pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
81 match event {
82 Ok(Event::Start(e)) => {
83 self.ns_resolver.push(&e)?;
84 Ok(Event::Start(e))
85 }
86 Ok(Event::Empty(e)) => {
87 self.ns_resolver.push(&e)?;
88 // notify next `read_event_impl()` invocation that it needs to pop this
89 // namespace scope
90 self.pending_pop = true;
91 Ok(Event::Empty(e))
92 }
93 Ok(Event::End(e)) => {
94 // notify next `read_event_impl()` invocation that it needs to pop this
95 // namespace scope
96 self.pending_pop = true;
97 Ok(Event::End(e))
98 }
99 e => e,
100 }
101 }
102}
103
104/// Getters
105impl<R> NsReader<R> {
106 /// Consumes `NsReader` returning the underlying reader
107 ///
108 /// See the [`Reader::into_inner`] for examples
109 #[inline]
110 pub fn into_inner(self) -> R {
111 self.reader.into_inner()
112 }
113
114 /// Gets a mutable reference to the underlying reader.
115 pub fn get_mut(&mut self) -> &mut R {
116 self.reader.get_mut()
117 }
118
119 /// Returns a storage of namespace bindings associated with this reader.
120 #[inline]
121 pub const fn resolver(&self) -> &NamespaceResolver {
122 &self.ns_resolver
123 }
124}
125
126impl<R: BufRead> NsReader<R> {
127 /// Reads the next event into given buffer.
128 ///
129 /// This method manages namespaces but doesn't resolve them automatically.
130 /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
131 ///
132 /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
133 /// namespace as soon as you get an event.
134 ///
135 /// # Examples
136 ///
137 /// ```
138 /// # use pretty_assertions::assert_eq;
139 /// use quick_xml::events::Event;
140 /// use quick_xml::name::{Namespace, ResolveResult::*};
141 /// use quick_xml::reader::NsReader;
142 ///
143 /// let mut reader = NsReader::from_str(r#"
144 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
145 /// <y:tag2><!--Test comment-->Test</y:tag2>
146 /// <y:tag2>Test 2</y:tag2>
147 /// </x:tag1>
148 /// "#);
149 /// reader.config_mut().trim_text(true);
150 ///
151 /// let mut count = 0;
152 /// let mut buf = Vec::new();
153 /// let mut txt = Vec::new();
154 /// loop {
155 /// match reader.read_event_into(&mut buf).unwrap() {
156 /// Event::Start(e) => {
157 /// count += 1;
158 /// let (ns, local) = reader.resolver().resolve_element(e.name());
159 /// match local.as_ref() {
160 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
161 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
162 /// _ => unreachable!(),
163 /// }
164 /// }
165 /// Event::Text(e) => {
166 /// txt.push(e.decode().unwrap().into_owned())
167 /// }
168 /// Event::Eof => break,
169 /// _ => (),
170 /// }
171 /// buf.clear();
172 /// }
173 /// assert_eq!(count, 3);
174 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
175 /// ```
176 ///
177 /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
178 /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
179 #[inline]
180 pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
181 self.read_event_impl(buf)
182 }
183
184 /// Reads the next event into given buffer and resolves its namespace (if applicable).
185 ///
186 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
187 /// For all other events the concept of namespace is not defined, so
188 /// a [`ResolveResult::Unbound`] is returned.
189 ///
190 /// If you are not interested in namespaces, you can use [`read_event_into()`]
191 /// which will not automatically resolve namespaces for you.
192 ///
193 /// # Examples
194 ///
195 /// ```
196 /// # use pretty_assertions::assert_eq;
197 /// use quick_xml::events::Event;
198 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
199 /// use quick_xml::reader::NsReader;
200 ///
201 /// let mut reader = NsReader::from_str(r#"
202 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
203 /// <y:tag2><!--Test comment-->Test</y:tag2>
204 /// <y:tag2>Test 2</y:tag2>
205 /// </x:tag1>
206 /// "#);
207 /// reader.config_mut().trim_text(true);
208 ///
209 /// let mut count = 0;
210 /// let mut buf = Vec::new();
211 /// let mut txt = Vec::new();
212 /// loop {
213 /// match reader.read_resolved_event_into(&mut buf).unwrap() {
214 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
215 /// count += 1;
216 /// assert_eq!(e.local_name(), QName(b"tag1").into());
217 /// }
218 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
219 /// count += 1;
220 /// assert_eq!(e.local_name(), QName(b"tag2").into());
221 /// }
222 /// (_, Event::Start(_)) => unreachable!(),
223 ///
224 /// (_, Event::Text(e)) => {
225 /// txt.push(e.decode().unwrap().into_owned())
226 /// }
227 /// (_, Event::Eof) => break,
228 /// _ => (),
229 /// }
230 /// buf.clear();
231 /// }
232 /// assert_eq!(count, 3);
233 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
234 /// ```
235 ///
236 /// [`Start`]: Event::Start
237 /// [`Empty`]: Event::Empty
238 /// [`End`]: Event::End
239 /// [`read_event_into()`]: Self::read_event_into
240 #[inline]
241 pub fn read_resolved_event_into<'b>(
242 &mut self,
243 buf: &'b mut Vec<u8>,
244 ) -> Result<(ResolveResult<'_>, Event<'b>)> {
245 let event = self.read_event_impl(buf)?;
246 Ok(self.ns_resolver.resolve_event(event))
247 }
248
249 /// Reads until end element is found using provided buffer as intermediate
250 /// storage for events content. This function is supposed to be called after
251 /// you already read a [`Start`] event.
252 ///
253 /// Returns a span that cover content between `>` of an opening tag and `<` of
254 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
255 /// this method was called after reading expanded [`Start`] event.
256 ///
257 /// Manages nested cases where parent and child elements have the _literally_
258 /// same name.
259 ///
260 /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
261 /// will be returned. In particularly, that error will be returned if you call
262 /// this method without consuming the corresponding [`Start`] event first.
263 ///
264 /// If your reader created from a string slice or byte array slice, it is
265 /// better to use [`read_to_end()`] method, because it will not copy bytes
266 /// into intermediate buffer.
267 ///
268 /// The provided `buf` buffer will be filled only by one event content at time.
269 /// Before reading of each event the buffer will be cleared. If you know an
270 /// appropriate size of each event, you can preallocate the buffer to reduce
271 /// number of reallocations.
272 ///
273 /// The `end` parameter should contain name of the end element _in the reader
274 /// encoding_. It is good practice to always get that parameter using
275 /// [`BytesStart::to_end()`] method.
276 ///
277 /// # Namespaces
278 ///
279 /// While the `NsReader` does namespace resolution, namespaces does not
280 /// change the algorithm for comparing names. Although the names `a:name`
281 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
282 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
283 /// according to [the specification]
284 ///
285 /// > The end of every element that begins with a **start-tag** MUST be marked
286 /// > by an **end-tag** containing a name that echoes the element's type as
287 /// > given in the **start-tag**
288 ///
289 /// # Examples
290 ///
291 /// This example shows, how you can skip XML content after you read the
292 /// start event.
293 ///
294 /// ```
295 /// # use pretty_assertions::assert_eq;
296 /// use quick_xml::events::{BytesStart, Event};
297 /// use quick_xml::name::{Namespace, ResolveResult};
298 /// use quick_xml::reader::NsReader;
299 ///
300 /// let mut reader = NsReader::from_str(r#"
301 /// <outer xmlns="namespace 1">
302 /// <inner xmlns="namespace 2">
303 /// <outer></outer>
304 /// </inner>
305 /// <inner>
306 /// <inner></inner>
307 /// <inner/>
308 /// <outer></outer>
309 /// <p:outer xmlns:p="ns"></p:outer>
310 /// <outer/>
311 /// </inner>
312 /// </outer>
313 /// "#);
314 /// reader.config_mut().trim_text(true);
315 /// let mut buf = Vec::new();
316 ///
317 /// let ns = Namespace(b"namespace 1");
318 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
319 /// let end = start.to_end().into_owned();
320 ///
321 /// // First, we read a start event...
322 /// assert_eq!(
323 /// reader.read_resolved_event_into(&mut buf).unwrap(),
324 /// (ResolveResult::Bound(ns), Event::Start(start))
325 /// );
326 ///
327 /// // ...then, we could skip all events to the corresponding end event.
328 /// // This call will correctly handle nested <outer> elements.
329 /// // Note, however, that this method does not handle namespaces.
330 /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
331 ///
332 /// // At the end we should get an Eof event, because we ate the whole XML
333 /// assert_eq!(
334 /// reader.read_resolved_event_into(&mut buf).unwrap(),
335 /// (ResolveResult::Unbound, Event::Eof)
336 /// );
337 /// ```
338 ///
339 /// [`Start`]: Event::Start
340 /// [`End`]: Event::End
341 /// [`IllFormed`]: crate::errors::Error::IllFormed
342 /// [`read_to_end()`]: Self::read_to_end
343 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
344 /// [`expand_empty_elements`]: Config::expand_empty_elements
345 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
346 #[inline]
347 pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
348 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
349 // match literally the start name. See `Config::check_end_names` documentation
350 let result = self.reader.read_to_end_into(end, buf)?;
351 // read_to_end_into will consume closing tag. Because nobody can access to its
352 // content anymore, we directly pop namespace of the opening tag
353 self.ns_resolver.pop();
354 Ok(result)
355 }
356
357 /// Reads content between start and end tags, including any markup using
358 /// provided buffer as intermediate storage for events content. This function
359 /// is supposed to be called after you already read a [`Start`] event.
360 ///
361 /// Manages nested cases where parent and child elements have the _literally_
362 /// same name.
363 ///
364 /// This method does not unescape read data, instead it returns content
365 /// "as is" of the XML document. This is because it has no idea what text
366 /// it reads, and if, for example, it contains CDATA section, attempt to
367 /// unescape it content will spoil data.
368 ///
369 /// If your reader created from a string slice or byte array slice, it is
370 /// better to use [`read_text()`] method, because it will not copy bytes
371 /// into intermediate buffer.
372 ///
373 /// # Examples
374 ///
375 /// This example shows, how you can read a HTML content from your XML document.
376 ///
377 /// ```
378 /// # use pretty_assertions::assert_eq;
379 /// # use std::borrow::Cow;
380 /// use quick_xml::events::{BytesStart, Event};
381 /// use quick_xml::reader::NsReader;
382 ///
383 /// let mut reader = NsReader::from_reader("
384 /// <html>
385 /// <title>This is a HTML text</title>
386 /// <p>Usual XML rules does not apply inside it
387 /// <p>For example, elements not needed to be "closed"
388 /// </html>
389 /// ".as_bytes());
390 /// reader.config_mut().trim_text(true);
391 ///
392 /// let start = BytesStart::new("html");
393 /// let end = start.to_end().into_owned();
394 ///
395 /// let mut buf = Vec::new();
396 ///
397 /// // First, we read a start event...
398 /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start));
399 /// // ...and disable checking of end names because we expect HTML further...
400 /// reader.config_mut().check_end_names = false;
401 ///
402 /// // ...then, we could read text content until close tag.
403 /// // This call will correctly handle nested <html> elements.
404 /// let text = reader.read_text_into(end.name(), &mut buf).unwrap();
405 /// let text = text.decode().unwrap();
406 /// assert_eq!(text, r#"
407 /// <title>This is a HTML text</title>
408 /// <p>Usual XML rules does not apply inside it
409 /// <p>For example, elements not needed to be "closed"
410 /// "#);
411 /// assert!(matches!(text, Cow::Borrowed(_)));
412 ///
413 /// // Now we can enable checks again
414 /// reader.config_mut().check_end_names = true;
415 ///
416 /// // At the end we should get an Eof event, because we ate the whole XML
417 /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
418 /// ```
419 ///
420 /// [`Start`]: Event::Start
421 /// [`read_text()`]: Self::read_text()
422 #[inline]
423 pub fn read_text_into<'b>(
424 &mut self,
425 end: QName,
426 buf: &'b mut Vec<u8>,
427 ) -> Result<BytesText<'b>> {
428 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
429 // match literally the start name. See `Self::check_end_names` documentation
430 let result = self.reader.read_text_into(end, buf)?;
431 // read_text_into will consume closing tag. Because nobody can access to its
432 // content anymore, we directly pop namespace of the opening tag
433 self.ns_resolver.pop();
434 Ok(result)
435 }
436}
437
438impl NsReader<BufReader<File>> {
439 /// Creates an XML reader from a file path.
440 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
441 Ok(Self::new(Reader::from_file(path)?))
442 }
443}
444
445impl<'i> NsReader<&'i [u8]> {
446 /// Creates an XML reader from a string slice.
447 #[inline]
448 #[allow(clippy::should_implement_trait)]
449 pub fn from_str(s: &'i str) -> Self {
450 Self::new(Reader::from_str(s))
451 }
452
453 /// Reads the next event, borrow its content from the input buffer.
454 ///
455 /// This method manages namespaces but doesn't resolve them automatically.
456 /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
457 ///
458 /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
459 /// as soon as you get an event.
460 ///
461 /// There is no asynchronous `read_event_async()` version of this function,
462 /// because it is not necessary -- the contents are already in memory and no IO
463 /// is needed, therefore there is no potential for blocking.
464 ///
465 /// # Examples
466 ///
467 /// ```
468 /// # use pretty_assertions::assert_eq;
469 /// use quick_xml::events::Event;
470 /// use quick_xml::name::{Namespace, ResolveResult::*};
471 /// use quick_xml::reader::NsReader;
472 ///
473 /// let mut reader = NsReader::from_str(r#"
474 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
475 /// <y:tag2><!--Test comment-->Test</y:tag2>
476 /// <y:tag2>Test 2</y:tag2>
477 /// </x:tag1>
478 /// "#);
479 /// reader.config_mut().trim_text(true);
480 ///
481 /// let mut count = 0;
482 /// let mut txt = Vec::new();
483 /// loop {
484 /// match reader.read_event().unwrap() {
485 /// Event::Start(e) => {
486 /// count += 1;
487 /// let (ns, local) = reader.resolver().resolve_element(e.name());
488 /// match local.as_ref() {
489 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
490 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
491 /// _ => unreachable!(),
492 /// }
493 /// }
494 /// Event::Text(e) => {
495 /// txt.push(e.decode().unwrap().into_owned())
496 /// }
497 /// Event::Eof => break,
498 /// _ => (),
499 /// }
500 /// }
501 /// assert_eq!(count, 3);
502 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
503 /// ```
504 ///
505 /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
506 /// [`read_resolved_event()`]: Self::read_resolved_event
507 #[inline]
508 pub fn read_event(&mut self) -> Result<Event<'i>> {
509 self.read_event_impl(())
510 }
511
512 /// Reads the next event, borrow its content from the input buffer, and resolves
513 /// its namespace (if applicable).
514 ///
515 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
516 /// For all other events the concept of namespace is not defined, so
517 /// a [`ResolveResult::Unbound`] is returned.
518 ///
519 /// If you are not interested in namespaces, you can use [`read_event()`]
520 /// which will not automatically resolve namespaces for you.
521 ///
522 /// There is no asynchronous `read_resolved_event_async()` version of this function,
523 /// because it is not necessary -- the contents are already in memory and no IO
524 /// is needed, therefore there is no potential for blocking.
525 ///
526 /// # Examples
527 ///
528 /// ```
529 /// # use pretty_assertions::assert_eq;
530 /// use quick_xml::events::Event;
531 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
532 /// use quick_xml::reader::NsReader;
533 ///
534 /// let mut reader = NsReader::from_str(r#"
535 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
536 /// <y:tag2><!--Test comment-->Test</y:tag2>
537 /// <y:tag2>Test 2</y:tag2>
538 /// </x:tag1>
539 /// "#);
540 /// reader.config_mut().trim_text(true);
541 ///
542 /// let mut count = 0;
543 /// let mut txt = Vec::new();
544 /// loop {
545 /// match reader.read_resolved_event().unwrap() {
546 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
547 /// count += 1;
548 /// assert_eq!(e.local_name(), QName(b"tag1").into());
549 /// }
550 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
551 /// count += 1;
552 /// assert_eq!(e.local_name(), QName(b"tag2").into());
553 /// }
554 /// (_, Event::Start(_)) => unreachable!(),
555 ///
556 /// (_, Event::Text(e)) => {
557 /// txt.push(e.decode().unwrap().into_owned())
558 /// }
559 /// (_, Event::Eof) => break,
560 /// _ => (),
561 /// }
562 /// }
563 /// assert_eq!(count, 3);
564 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
565 /// ```
566 ///
567 /// [`Start`]: Event::Start
568 /// [`Empty`]: Event::Empty
569 /// [`End`]: Event::End
570 /// [`read_event()`]: Self::read_event
571 #[inline]
572 pub fn read_resolved_event(&mut self) -> Result<(ResolveResult<'_>, Event<'i>)> {
573 let event = self.read_event_impl(())?;
574 Ok(self.ns_resolver.resolve_event(event))
575 }
576
577 /// Reads until end element is found. This function is supposed to be called
578 /// after you already read a [`Start`] event.
579 ///
580 /// Returns a span that cover content between `>` of an opening tag and `<` of
581 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
582 /// this method was called after reading expanded [`Start`] event.
583 ///
584 /// Manages nested cases where parent and child elements have the _literally_
585 /// same name.
586 ///
587 /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
588 /// will be returned. In particularly, that error will be returned if you call
589 /// this method without consuming the corresponding [`Start`] event first.
590 ///
591 /// The `end` parameter should contain name of the end element _in the reader
592 /// encoding_. It is good practice to always get that parameter using
593 /// [`BytesStart::to_end()`] method.
594 ///
595 /// There is no asynchronous `read_to_end_async()` version of this function,
596 /// because it is not necessary -- the contents are already in memory and no IO
597 /// is needed, therefore there is no potential for blocking.
598 ///
599 /// # Namespaces
600 ///
601 /// While the `NsReader` does namespace resolution, namespaces does not
602 /// change the algorithm for comparing names. Although the names `a:name`
603 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
604 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
605 /// according to [the specification]
606 ///
607 /// > The end of every element that begins with a **start-tag** MUST be marked
608 /// > by an **end-tag** containing a name that echoes the element's type as
609 /// > given in the **start-tag**
610 ///
611 /// # Examples
612 ///
613 /// This example shows, how you can skip XML content after you read the
614 /// start event.
615 ///
616 /// ```
617 /// # use pretty_assertions::assert_eq;
618 /// use quick_xml::events::{BytesStart, Event};
619 /// use quick_xml::name::{Namespace, ResolveResult};
620 /// use quick_xml::reader::NsReader;
621 ///
622 /// let mut reader = NsReader::from_str(r#"
623 /// <outer xmlns="namespace 1">
624 /// <inner xmlns="namespace 2">
625 /// <outer></outer>
626 /// </inner>
627 /// <inner>
628 /// <inner></inner>
629 /// <inner/>
630 /// <outer></outer>
631 /// <p:outer xmlns:p="ns"></p:outer>
632 /// <outer/>
633 /// </inner>
634 /// </outer>
635 /// "#);
636 /// reader.config_mut().trim_text(true);
637 ///
638 /// let ns = Namespace(b"namespace 1");
639 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
640 /// let end = start.to_end().into_owned();
641 ///
642 /// // First, we read a start event...
643 /// assert_eq!(
644 /// reader.read_resolved_event().unwrap(),
645 /// (ResolveResult::Bound(ns), Event::Start(start))
646 /// );
647 ///
648 /// // ...then, we could skip all events to the corresponding end event.
649 /// // This call will correctly handle nested <outer> elements.
650 /// // Note, however, that this method does not handle namespaces.
651 /// reader.read_to_end(end.name()).unwrap();
652 ///
653 /// // At the end we should get an Eof event, because we ate the whole XML
654 /// assert_eq!(
655 /// reader.read_resolved_event().unwrap(),
656 /// (ResolveResult::Unbound, Event::Eof)
657 /// );
658 /// ```
659 ///
660 /// [`Start`]: Event::Start
661 /// [`End`]: Event::End
662 /// [`IllFormed`]: crate::errors::Error::IllFormed
663 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
664 /// [`expand_empty_elements`]: Config::expand_empty_elements
665 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
666 #[inline]
667 pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
668 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
669 // match literally the start name. See `Config::check_end_names` documentation
670 let result = self.reader.read_to_end(end)?;
671 // read_to_end will consume closing tag. Because nobody can access to its
672 // content anymore, we directly pop namespace of the opening tag
673 self.ns_resolver.pop();
674 Ok(result)
675 }
676
677 /// Reads content between start and end tags, including any markup. This
678 /// function is supposed to be called after you already read a [`Start`] event.
679 ///
680 /// Manages nested cases where parent and child elements have the _literally_
681 /// same name.
682 ///
683 /// This method does not unescape read data, instead it returns content
684 /// "as is" of the XML document. This is because it has no idea what text
685 /// it reads, and if, for example, it contains CDATA section, attempt to
686 /// unescape it content will spoil data.
687 ///
688 /// Any text will be decoded using the XML current [`decoder()`].
689 ///
690 /// Actually, this method perform the following code:
691 ///
692 /// ```ignore
693 /// let span = reader.read_to_end(end)?;
694 /// let text = reader.decoder().decode(&reader.inner_slice[span]);
695 /// ```
696 ///
697 /// # Examples
698 ///
699 /// This example shows, how you can read a HTML content from your XML document.
700 ///
701 /// ```
702 /// # use pretty_assertions::assert_eq;
703 /// # use std::borrow::Cow;
704 /// use quick_xml::events::{BytesStart, Event};
705 /// use quick_xml::reader::NsReader;
706 ///
707 /// let mut reader = NsReader::from_str(r#"
708 /// <html>
709 /// <title>This is a HTML text</title>
710 /// <p>Usual XML rules does not apply inside it
711 /// <p>For example, elements not needed to be "closed"
712 /// </html>
713 /// "#);
714 /// reader.config_mut().trim_text(true);
715 ///
716 /// let start = BytesStart::new("html");
717 /// let end = start.to_end().into_owned();
718 ///
719 /// // First, we read a start event...
720 /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
721 /// // ...and disable checking of end names because we expect HTML further...
722 /// reader.config_mut().check_end_names = false;
723 ///
724 /// // ...then, we could read text content until close tag.
725 /// // This call will correctly handle nested <html> elements.
726 /// let text = reader.read_text(end.name()).unwrap();
727 /// let text = text.decode().unwrap();
728 /// assert_eq!(text, r#"
729 /// <title>This is a HTML text</title>
730 /// <p>Usual XML rules does not apply inside it
731 /// <p>For example, elements not needed to be "closed"
732 /// "#);
733 /// assert!(matches!(text, Cow::Borrowed(_)));
734 ///
735 /// // Now we can enable checks again
736 /// reader.config_mut().check_end_names = true;
737 ///
738 /// // At the end we should get an Eof event, because we ate the whole XML
739 /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
740 /// ```
741 ///
742 /// [`Start`]: Event::Start
743 /// [`decoder()`]: Reader::decoder()
744 #[inline]
745 pub fn read_text(&mut self, end: QName) -> Result<BytesText<'i>> {
746 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
747 // match literally the start name. See `Self::check_end_names` documentation
748 let result = self.reader.read_text(end)?;
749 // read_text will consume closing tag. Because nobody can access to its
750 // content anymore, we directly pop namespace of the opening tag
751 self.ns_resolver.pop();
752 Ok(result)
753 }
754}
755
756impl<R> Deref for NsReader<R> {
757 type Target = Reader<R>;
758
759 #[inline]
760 fn deref(&self) -> &Self::Target {
761 &self.reader
762 }
763}