quick_xml/reader/ns_reader.rs
1//! A reader that manages namespace declarations found in the input and able
2//! to resolve [qualified names] to [expanded names].
3//!
4//! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
5//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
6
7use std::borrow::Cow;
8use std::fs::File;
9use std::io::{BufRead, BufReader};
10use std::ops::Deref;
11use std::path::Path;
12
13use crate::errors::Result;
14use crate::events::{BytesText, Event};
15use crate::name::{LocalName, NamespaceBindingsIter, NamespaceResolver, QName, ResolveResult};
16use crate::reader::{Config, Reader, Span, XmlSource};
17
18/// A low level encoding-agnostic XML event reader that performs namespace resolution.
19///
20/// Consumes a [`BufRead`] and streams XML `Event`s.
21#[derive(Debug, Clone)]
22pub struct NsReader<R> {
23 /// An XML reader
24 pub(super) reader: Reader<R>,
25 /// A buffer to manage namespaces
26 pub(super) ns_resolver: NamespaceResolver,
27 /// We cannot pop data from the namespace stack until returned `Empty` or `End`
28 /// event will be processed by the user, so we only mark that we should that
29 /// in the next [`Self::read_event_impl()`] call.
30 pending_pop: bool,
31}
32
33/// Builder methods
34impl<R> NsReader<R> {
35 /// Creates a `NsReader` that reads from a reader.
36 #[inline]
37 pub fn from_reader(reader: R) -> Self {
38 Self::new(Reader::from_reader(reader))
39 }
40
41 /// Returns reference to the parser configuration
42 #[inline]
43 pub const fn config(&self) -> &Config {
44 self.reader.config()
45 }
46
47 /// Returns mutable reference to the parser configuration
48 #[inline]
49 pub fn config_mut(&mut self) -> &mut Config {
50 self.reader.config_mut()
51 }
52
53 /// Returns all the prefixes currently declared except the default `xml` and `xmlns` namespaces.
54 ///
55 /// # Examples
56 ///
57 /// This example shows what results the returned iterator would return after
58 /// reading each event of a simple XML.
59 ///
60 /// ```
61 /// # use pretty_assertions::assert_eq;
62 /// use quick_xml::name::{Namespace, PrefixDeclaration};
63 /// use quick_xml::NsReader;
64 ///
65 /// let src = "<root>
66 /// <a xmlns=\"a1\" xmlns:a=\"a2\">
67 /// <b xmlns=\"b1\" xmlns:b=\"b2\">
68 /// <c/>
69 /// </b>
70 /// <d/>
71 /// </a>
72 /// </root>";
73 /// let mut reader = NsReader::from_str(src);
74 /// reader.config_mut().trim_text(true);
75 /// // No prefixes at the beginning
76 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
77 ///
78 /// reader.read_resolved_event()?; // <root>
79 /// // No prefixes declared on root
80 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
81 ///
82 /// reader.read_resolved_event()?; // <a>
83 /// // Two prefixes declared on "a"
84 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
85 /// (PrefixDeclaration::Default, Namespace(b"a1")),
86 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
87 /// ]);
88 ///
89 /// reader.read_resolved_event()?; // <b>
90 /// // The default prefix got overridden and new "b" prefix
91 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
92 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
93 /// (PrefixDeclaration::Default, Namespace(b"b1")),
94 /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
95 /// ]);
96 ///
97 /// reader.read_resolved_event()?; // <c/>
98 /// // Still the same
99 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
100 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
101 /// (PrefixDeclaration::Default, Namespace(b"b1")),
102 /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
103 /// ]);
104 ///
105 /// reader.read_resolved_event()?; // </b>
106 /// // Still the same
107 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
108 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
109 /// (PrefixDeclaration::Default, Namespace(b"b1")),
110 /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
111 /// ]);
112 ///
113 /// reader.read_resolved_event()?; // <d/>
114 /// // </b> got closed so back to the prefixes declared on <a>
115 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
116 /// (PrefixDeclaration::Default, Namespace(b"a1")),
117 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
118 /// ]);
119 ///
120 /// reader.read_resolved_event()?; // </a>
121 /// // Still the same
122 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
123 /// (PrefixDeclaration::Default, Namespace(b"a1")),
124 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
125 /// ]);
126 ///
127 /// reader.read_resolved_event()?; // </root>
128 /// // <a> got closed
129 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
130 /// # quick_xml::Result::Ok(())
131 /// ```
132 #[inline]
133 #[deprecated = "Use `.resolver().bindings()` instead. This method will be removed in 0.40.0"]
134 pub const fn prefixes(&self) -> NamespaceBindingsIter<'_> {
135 self.ns_resolver.bindings()
136 }
137}
138
139/// Private methods
140impl<R> NsReader<R> {
141 #[inline]
142 fn new(reader: Reader<R>) -> Self {
143 Self {
144 reader,
145 ns_resolver: NamespaceResolver::default(),
146 pending_pop: false,
147 }
148 }
149
150 fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
151 where
152 R: XmlSource<'i, B>,
153 {
154 self.pop();
155 let event = self.reader.read_event_impl(buf);
156 self.process_event(event)
157 }
158
159 pub(super) fn pop(&mut self) {
160 if self.pending_pop {
161 self.ns_resolver.pop();
162 self.pending_pop = false;
163 }
164 }
165
166 pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
167 match event {
168 Ok(Event::Start(e)) => {
169 self.ns_resolver.push(&e)?;
170 Ok(Event::Start(e))
171 }
172 Ok(Event::Empty(e)) => {
173 self.ns_resolver.push(&e)?;
174 // notify next `read_event_impl()` invocation that it needs to pop this
175 // namespace scope
176 self.pending_pop = true;
177 Ok(Event::Empty(e))
178 }
179 Ok(Event::End(e)) => {
180 // notify next `read_event_impl()` invocation that it needs to pop this
181 // namespace scope
182 self.pending_pop = true;
183 Ok(Event::End(e))
184 }
185 e => e,
186 }
187 }
188}
189
190/// Getters
191impl<R> NsReader<R> {
192 /// Consumes `NsReader` returning the underlying reader
193 ///
194 /// See the [`Reader::into_inner`] for examples
195 #[inline]
196 pub fn into_inner(self) -> R {
197 self.reader.into_inner()
198 }
199
200 /// Gets a mutable reference to the underlying reader.
201 pub fn get_mut(&mut self) -> &mut R {
202 self.reader.get_mut()
203 }
204
205 /// Returns a storage of namespace bindings associated with this reader.
206 #[inline]
207 pub const fn resolver(&self) -> &NamespaceResolver {
208 &self.ns_resolver
209 }
210
211 /// Resolves a potentially qualified **element name** or **attribute name**
212 /// into _(namespace name, local name)_.
213 ///
214 /// _Qualified_ names have the form `local-name` or `prefix:local-name` where the `prefix`
215 /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
216 /// The namespace prefix can be defined on the same element as the name in question.
217 ///
218 /// The method returns following results depending on the `name` shape, `attribute` flag
219 /// and the presence of the default namespace on element or any of its parents:
220 ///
221 /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName
222 /// |---------|-------------|-------------------|-----------------------|------------
223 /// |`true` |_(any)_ |`local-name` |[`Unbound`] |`local-name`
224 /// |`true` |_(any)_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
225 /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name`
226 /// |`false` |Defined |`local-name` |[`Bound`] (to `xmlns`) |`local-name`
227 /// |`false` |_(any)_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
228 ///
229 /// If you want to clearly indicate that name that you resolve is an element
230 /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`]
231 /// methods.
232 ///
233 /// # Lifetimes
234 ///
235 /// - `'n`: lifetime of a name. Returned local name will be bound to the same
236 /// lifetime as the name in question.
237 /// - returned namespace name will be bound to the reader itself
238 ///
239 /// [`Bound`]: ResolveResult::Bound
240 /// [`Unbound`]: ResolveResult::Unbound
241 /// [`Unknown`]: ResolveResult::Unknown
242 /// [`resolve_attribute()`]: Self::resolve_attribute()
243 /// [`resolve_element()`]: Self::resolve_element()
244 #[inline]
245 #[deprecated = "Use `.resolver().resolve()` instead. Note, that boolean argument should be inverted! This method will be removed in 0.40.0"]
246 pub fn resolve<'n>(
247 &self,
248 name: QName<'n>,
249 attribute: bool,
250 ) -> (ResolveResult<'_>, LocalName<'n>) {
251 self.ns_resolver.resolve(name, !attribute)
252 }
253
254 /// Resolves a potentially qualified **element name** into _(namespace name, local name)_.
255 ///
256 /// _Qualified_ element names have the form `prefix:local-name` where the
257 /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
258 /// The namespace prefix can be defined on the same element as the element
259 /// in question.
260 ///
261 /// _Unqualified_ elements inherits the current _default namespace_.
262 ///
263 /// The method returns following results depending on the `name` shape and
264 /// the presence of the default namespace:
265 ///
266 /// |`xmlns="..."`|QName |ResolveResult |LocalName
267 /// |-------------|-------------------|-----------------------|------------
268 /// |Not defined |`local-name` |[`Unbound`] |`local-name`
269 /// |Defined |`local-name` |[`Bound`] (default) |`local-name`
270 /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
271 ///
272 /// # Lifetimes
273 ///
274 /// - `'n`: lifetime of an element name. Returned local name will be bound
275 /// to the same lifetime as the name in question.
276 /// - returned namespace name will be bound to the reader itself
277 ///
278 /// # Examples
279 ///
280 /// This example shows how you can resolve qualified name into a namespace.
281 /// Note, that in the code like this you do not need to do that manually,
282 /// because the namespace resolution result returned by the [`read_resolved_event()`].
283 ///
284 /// ```
285 /// # use pretty_assertions::assert_eq;
286 /// use quick_xml::events::Event;
287 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
288 /// use quick_xml::reader::NsReader;
289 ///
290 /// let mut reader = NsReader::from_str("<tag xmlns='root namespace'/>");
291 ///
292 /// match reader.read_event().unwrap() {
293 /// Event::Empty(e) => assert_eq!(
294 /// reader.resolve_element(e.name()),
295 /// (Bound(Namespace(b"root namespace")), QName(b"tag").into())
296 /// ),
297 /// _ => unreachable!(),
298 /// }
299 /// ```
300 ///
301 /// [`Bound`]: ResolveResult::Bound
302 /// [`Unbound`]: ResolveResult::Unbound
303 /// [`Unknown`]: ResolveResult::Unknown
304 /// [`read_resolved_event()`]: Self::read_resolved_event
305 #[inline]
306 #[deprecated = "Use `.resolver().resolve_element()` instead. This method will be removed in 0.40.0"]
307 pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult<'_>, LocalName<'n>) {
308 self.ns_resolver.resolve_element(name)
309 }
310
311 /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
312 ///
313 /// _Qualified_ attribute names have the form `prefix:local-name` where the
314 /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
315 /// The namespace prefix can be defined on the same element as the attribute
316 /// in question.
317 ///
318 /// _Unqualified_ attribute names do *not* inherit the current _default namespace_.
319 ///
320 /// The method returns following results depending on the `name` shape and
321 /// the presence of the default namespace:
322 ///
323 /// |`xmlns="..."`|QName |ResolveResult |LocalName
324 /// |-------------|-------------------|-----------------------|------------
325 /// |Not defined |`local-name` |[`Unbound`] |`local-name`
326 /// |Defined |`local-name` |[`Unbound`] |`local-name`
327 /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
328 ///
329 /// # Lifetimes
330 ///
331 /// - `'n`: lifetime of an attribute name. Returned local name will be bound
332 /// to the same lifetime as the name in question.
333 /// - returned namespace name will be bound to the reader itself
334 ///
335 /// # Examples
336 ///
337 /// ```
338 /// # use pretty_assertions::assert_eq;
339 /// use quick_xml::events::Event;
340 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
341 /// use quick_xml::reader::NsReader;
342 ///
343 /// let mut reader = NsReader::from_str("
344 /// <tag one='1'
345 /// p:two='2'
346 /// xmlns='root namespace'
347 /// xmlns:p='other namespace'/>
348 /// ");
349 /// reader.config_mut().trim_text(true);
350 ///
351 /// match reader.read_event().unwrap() {
352 /// Event::Empty(e) => {
353 /// let mut iter = e.attributes();
354 ///
355 /// // Unlike elements, attributes without explicit namespace
356 /// // not bound to any namespace
357 /// let one = iter.next().unwrap().unwrap();
358 /// assert_eq!(
359 /// reader.resolve_attribute(one.key),
360 /// (Unbound, QName(b"one").into())
361 /// );
362 ///
363 /// let two = iter.next().unwrap().unwrap();
364 /// assert_eq!(
365 /// reader.resolve_attribute(two.key),
366 /// (Bound(Namespace(b"other namespace")), QName(b"two").into())
367 /// );
368 /// }
369 /// _ => unreachable!(),
370 /// }
371 /// ```
372 ///
373 /// [`Bound`]: ResolveResult::Bound
374 /// [`Unbound`]: ResolveResult::Unbound
375 /// [`Unknown`]: ResolveResult::Unknown
376 #[inline]
377 #[deprecated = "Use `.resolver().resolve_attribute()` instead. This method will be removed in 0.40.0"]
378 pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult<'_>, LocalName<'n>) {
379 self.ns_resolver.resolve_attribute(name)
380 }
381}
382
383impl<R: BufRead> NsReader<R> {
384 /// Reads the next event into given buffer.
385 ///
386 /// This method manages namespaces but doesn't resolve them automatically.
387 /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
388 ///
389 /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
390 /// namespace as soon as you get an event.
391 ///
392 /// # Examples
393 ///
394 /// ```
395 /// # use pretty_assertions::assert_eq;
396 /// use quick_xml::events::Event;
397 /// use quick_xml::name::{Namespace, ResolveResult::*};
398 /// use quick_xml::reader::NsReader;
399 ///
400 /// let mut reader = NsReader::from_str(r#"
401 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
402 /// <y:tag2><!--Test comment-->Test</y:tag2>
403 /// <y:tag2>Test 2</y:tag2>
404 /// </x:tag1>
405 /// "#);
406 /// reader.config_mut().trim_text(true);
407 ///
408 /// let mut count = 0;
409 /// let mut buf = Vec::new();
410 /// let mut txt = Vec::new();
411 /// loop {
412 /// match reader.read_event_into(&mut buf).unwrap() {
413 /// Event::Start(e) => {
414 /// count += 1;
415 /// let (ns, local) = reader.resolver().resolve_element(e.name());
416 /// match local.as_ref() {
417 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
418 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
419 /// _ => unreachable!(),
420 /// }
421 /// }
422 /// Event::Text(e) => {
423 /// txt.push(e.decode().unwrap().into_owned())
424 /// }
425 /// Event::Eof => break,
426 /// _ => (),
427 /// }
428 /// buf.clear();
429 /// }
430 /// assert_eq!(count, 3);
431 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
432 /// ```
433 ///
434 /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
435 /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
436 #[inline]
437 pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
438 self.read_event_impl(buf)
439 }
440
441 /// Reads the next event into given buffer and resolves its namespace (if applicable).
442 ///
443 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
444 /// For all other events the concept of namespace is not defined, so
445 /// a [`ResolveResult::Unbound`] is returned.
446 ///
447 /// If you are not interested in namespaces, you can use [`read_event_into()`]
448 /// which will not automatically resolve namespaces for you.
449 ///
450 /// # Examples
451 ///
452 /// ```
453 /// # use pretty_assertions::assert_eq;
454 /// use quick_xml::events::Event;
455 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
456 /// use quick_xml::reader::NsReader;
457 ///
458 /// let mut reader = NsReader::from_str(r#"
459 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
460 /// <y:tag2><!--Test comment-->Test</y:tag2>
461 /// <y:tag2>Test 2</y:tag2>
462 /// </x:tag1>
463 /// "#);
464 /// reader.config_mut().trim_text(true);
465 ///
466 /// let mut count = 0;
467 /// let mut buf = Vec::new();
468 /// let mut txt = Vec::new();
469 /// loop {
470 /// match reader.read_resolved_event_into(&mut buf).unwrap() {
471 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
472 /// count += 1;
473 /// assert_eq!(e.local_name(), QName(b"tag1").into());
474 /// }
475 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
476 /// count += 1;
477 /// assert_eq!(e.local_name(), QName(b"tag2").into());
478 /// }
479 /// (_, Event::Start(_)) => unreachable!(),
480 ///
481 /// (_, Event::Text(e)) => {
482 /// txt.push(e.decode().unwrap().into_owned())
483 /// }
484 /// (_, Event::Eof) => break,
485 /// _ => (),
486 /// }
487 /// buf.clear();
488 /// }
489 /// assert_eq!(count, 3);
490 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
491 /// ```
492 ///
493 /// [`Start`]: Event::Start
494 /// [`Empty`]: Event::Empty
495 /// [`End`]: Event::End
496 /// [`read_event_into()`]: Self::read_event_into
497 #[inline]
498 pub fn read_resolved_event_into<'b>(
499 &mut self,
500 buf: &'b mut Vec<u8>,
501 ) -> Result<(ResolveResult<'_>, Event<'b>)> {
502 let event = self.read_event_impl(buf)?;
503 Ok(self.ns_resolver.resolve_event(event))
504 }
505
506 /// Reads until end element is found using provided buffer as intermediate
507 /// storage for events content. This function is supposed to be called after
508 /// you already read a [`Start`] event.
509 ///
510 /// Returns a span that cover content between `>` of an opening tag and `<` of
511 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
512 /// this method was called after reading expanded [`Start`] event.
513 ///
514 /// Manages nested cases where parent and child elements have the _literally_
515 /// same name.
516 ///
517 /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
518 /// will be returned. In particularly, that error will be returned if you call
519 /// this method without consuming the corresponding [`Start`] event first.
520 ///
521 /// If your reader created from a string slice or byte array slice, it is
522 /// better to use [`read_to_end()`] method, because it will not copy bytes
523 /// into intermediate buffer.
524 ///
525 /// The provided `buf` buffer will be filled only by one event content at time.
526 /// Before reading of each event the buffer will be cleared. If you know an
527 /// appropriate size of each event, you can preallocate the buffer to reduce
528 /// number of reallocations.
529 ///
530 /// The `end` parameter should contain name of the end element _in the reader
531 /// encoding_. It is good practice to always get that parameter using
532 /// [`BytesStart::to_end()`] method.
533 ///
534 /// # Namespaces
535 ///
536 /// While the `NsReader` does namespace resolution, namespaces does not
537 /// change the algorithm for comparing names. Although the names `a:name`
538 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
539 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
540 /// according to [the specification]
541 ///
542 /// > The end of every element that begins with a **start-tag** MUST be marked
543 /// > by an **end-tag** containing a name that echoes the element's type as
544 /// > given in the **start-tag**
545 ///
546 /// # Examples
547 ///
548 /// This example shows, how you can skip XML content after you read the
549 /// start event.
550 ///
551 /// ```
552 /// # use pretty_assertions::assert_eq;
553 /// use quick_xml::events::{BytesStart, Event};
554 /// use quick_xml::name::{Namespace, ResolveResult};
555 /// use quick_xml::reader::NsReader;
556 ///
557 /// let mut reader = NsReader::from_str(r#"
558 /// <outer xmlns="namespace 1">
559 /// <inner xmlns="namespace 2">
560 /// <outer></outer>
561 /// </inner>
562 /// <inner>
563 /// <inner></inner>
564 /// <inner/>
565 /// <outer></outer>
566 /// <p:outer xmlns:p="ns"></p:outer>
567 /// <outer/>
568 /// </inner>
569 /// </outer>
570 /// "#);
571 /// reader.config_mut().trim_text(true);
572 /// let mut buf = Vec::new();
573 ///
574 /// let ns = Namespace(b"namespace 1");
575 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
576 /// let end = start.to_end().into_owned();
577 ///
578 /// // First, we read a start event...
579 /// assert_eq!(
580 /// reader.read_resolved_event_into(&mut buf).unwrap(),
581 /// (ResolveResult::Bound(ns), Event::Start(start))
582 /// );
583 ///
584 /// // ...then, we could skip all events to the corresponding end event.
585 /// // This call will correctly handle nested <outer> elements.
586 /// // Note, however, that this method does not handle namespaces.
587 /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
588 ///
589 /// // At the end we should get an Eof event, because we ate the whole XML
590 /// assert_eq!(
591 /// reader.read_resolved_event_into(&mut buf).unwrap(),
592 /// (ResolveResult::Unbound, Event::Eof)
593 /// );
594 /// ```
595 ///
596 /// [`Start`]: Event::Start
597 /// [`End`]: Event::End
598 /// [`IllFormed`]: crate::errors::Error::IllFormed
599 /// [`read_to_end()`]: Self::read_to_end
600 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
601 /// [`expand_empty_elements`]: Config::expand_empty_elements
602 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
603 #[inline]
604 pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
605 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
606 // match literally the start name. See `Config::check_end_names` documentation
607 let result = self.reader.read_to_end_into(end, buf)?;
608 // read_to_end_into will consume closing tag. Because nobody can access to its
609 // content anymore, we directly pop namespace of the opening tag
610 self.ns_resolver.pop();
611 Ok(result)
612 }
613
614 /// Reads content between start and end tags, including any markup using
615 /// provided buffer as intermediate storage for events content. This function
616 /// is supposed to be called after you already read a [`Start`] event.
617 ///
618 /// Manages nested cases where parent and child elements have the _literally_
619 /// same name.
620 ///
621 /// This method does not unescape read data, instead it returns content
622 /// "as is" of the XML document. This is because it has no idea what text
623 /// it reads, and if, for example, it contains CDATA section, attempt to
624 /// unescape it content will spoil data.
625 ///
626 /// If your reader created from a string slice or byte array slice, it is
627 /// better to use [`read_text()`] method, because it will not copy bytes
628 /// into intermediate buffer.
629 ///
630 /// # Examples
631 ///
632 /// This example shows, how you can read a HTML content from your XML document.
633 ///
634 /// ```
635 /// # use pretty_assertions::assert_eq;
636 /// # use std::borrow::Cow;
637 /// use quick_xml::events::{BytesStart, Event};
638 /// use quick_xml::reader::NsReader;
639 ///
640 /// let mut reader = NsReader::from_reader("
641 /// <html>
642 /// <title>This is a HTML text</title>
643 /// <p>Usual XML rules does not apply inside it
644 /// <p>For example, elements not needed to be "closed"
645 /// </html>
646 /// ".as_bytes());
647 /// reader.config_mut().trim_text(true);
648 ///
649 /// let start = BytesStart::new("html");
650 /// let end = start.to_end().into_owned();
651 ///
652 /// let mut buf = Vec::new();
653 ///
654 /// // First, we read a start event...
655 /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start));
656 /// // ...and disable checking of end names because we expect HTML further...
657 /// reader.config_mut().check_end_names = false;
658 ///
659 /// // ...then, we could read text content until close tag.
660 /// // This call will correctly handle nested <html> elements.
661 /// let text = reader.read_text_into(end.name(), &mut buf).unwrap();
662 /// let text = text.decode().unwrap();
663 /// assert_eq!(text, r#"
664 /// <title>This is a HTML text</title>
665 /// <p>Usual XML rules does not apply inside it
666 /// <p>For example, elements not needed to be "closed"
667 /// "#);
668 /// assert!(matches!(text, Cow::Borrowed(_)));
669 ///
670 /// // Now we can enable checks again
671 /// reader.config_mut().check_end_names = true;
672 ///
673 /// // At the end we should get an Eof event, because we ate the whole XML
674 /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
675 /// ```
676 ///
677 /// [`Start`]: Event::Start
678 /// [`read_text()`]: Self::read_text()
679 #[inline]
680 pub fn read_text_into<'b>(
681 &mut self,
682 end: QName,
683 buf: &'b mut Vec<u8>,
684 ) -> Result<BytesText<'b>> {
685 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
686 // match literally the start name. See `Self::check_end_names` documentation
687 let result = self.reader.read_text_into(end, buf)?;
688 // read_text_into will consume closing tag. Because nobody can access to its
689 // content anymore, we directly pop namespace of the opening tag
690 self.ns_resolver.pop();
691 Ok(result)
692 }
693}
694
695impl NsReader<BufReader<File>> {
696 /// Creates an XML reader from a file path.
697 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
698 Ok(Self::new(Reader::from_file(path)?))
699 }
700}
701
702impl<'i> NsReader<&'i [u8]> {
703 /// Creates an XML reader from a string slice.
704 #[inline]
705 #[allow(clippy::should_implement_trait)]
706 pub fn from_str(s: &'i str) -> Self {
707 Self::new(Reader::from_str(s))
708 }
709
710 /// Reads the next event, borrow its content from the input buffer.
711 ///
712 /// This method manages namespaces but doesn't resolve them automatically.
713 /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
714 ///
715 /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
716 /// as soon as you get an event.
717 ///
718 /// There is no asynchronous `read_event_async()` version of this function,
719 /// because it is not necessary -- the contents are already in memory and no IO
720 /// is needed, therefore there is no potential for blocking.
721 ///
722 /// # Examples
723 ///
724 /// ```
725 /// # use pretty_assertions::assert_eq;
726 /// use quick_xml::events::Event;
727 /// use quick_xml::name::{Namespace, ResolveResult::*};
728 /// use quick_xml::reader::NsReader;
729 ///
730 /// let mut reader = NsReader::from_str(r#"
731 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
732 /// <y:tag2><!--Test comment-->Test</y:tag2>
733 /// <y:tag2>Test 2</y:tag2>
734 /// </x:tag1>
735 /// "#);
736 /// reader.config_mut().trim_text(true);
737 ///
738 /// let mut count = 0;
739 /// let mut txt = Vec::new();
740 /// loop {
741 /// match reader.read_event().unwrap() {
742 /// Event::Start(e) => {
743 /// count += 1;
744 /// let (ns, local) = reader.resolver().resolve_element(e.name());
745 /// match local.as_ref() {
746 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
747 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
748 /// _ => unreachable!(),
749 /// }
750 /// }
751 /// Event::Text(e) => {
752 /// txt.push(e.decode().unwrap().into_owned())
753 /// }
754 /// Event::Eof => break,
755 /// _ => (),
756 /// }
757 /// }
758 /// assert_eq!(count, 3);
759 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
760 /// ```
761 ///
762 /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
763 /// [`read_resolved_event()`]: Self::read_resolved_event
764 #[inline]
765 pub fn read_event(&mut self) -> Result<Event<'i>> {
766 self.read_event_impl(())
767 }
768
769 /// Reads the next event, borrow its content from the input buffer, and resolves
770 /// its namespace (if applicable).
771 ///
772 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
773 /// For all other events the concept of namespace is not defined, so
774 /// a [`ResolveResult::Unbound`] is returned.
775 ///
776 /// If you are not interested in namespaces, you can use [`read_event()`]
777 /// which will not automatically resolve namespaces for you.
778 ///
779 /// There is no asynchronous `read_resolved_event_async()` version of this function,
780 /// because it is not necessary -- the contents are already in memory and no IO
781 /// is needed, therefore there is no potential for blocking.
782 ///
783 /// # Examples
784 ///
785 /// ```
786 /// # use pretty_assertions::assert_eq;
787 /// use quick_xml::events::Event;
788 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
789 /// use quick_xml::reader::NsReader;
790 ///
791 /// let mut reader = NsReader::from_str(r#"
792 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
793 /// <y:tag2><!--Test comment-->Test</y:tag2>
794 /// <y:tag2>Test 2</y:tag2>
795 /// </x:tag1>
796 /// "#);
797 /// reader.config_mut().trim_text(true);
798 ///
799 /// let mut count = 0;
800 /// let mut txt = Vec::new();
801 /// loop {
802 /// match reader.read_resolved_event().unwrap() {
803 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
804 /// count += 1;
805 /// assert_eq!(e.local_name(), QName(b"tag1").into());
806 /// }
807 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
808 /// count += 1;
809 /// assert_eq!(e.local_name(), QName(b"tag2").into());
810 /// }
811 /// (_, Event::Start(_)) => unreachable!(),
812 ///
813 /// (_, Event::Text(e)) => {
814 /// txt.push(e.decode().unwrap().into_owned())
815 /// }
816 /// (_, Event::Eof) => break,
817 /// _ => (),
818 /// }
819 /// }
820 /// assert_eq!(count, 3);
821 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
822 /// ```
823 ///
824 /// [`Start`]: Event::Start
825 /// [`Empty`]: Event::Empty
826 /// [`End`]: Event::End
827 /// [`read_event()`]: Self::read_event
828 #[inline]
829 pub fn read_resolved_event(&mut self) -> Result<(ResolveResult<'_>, Event<'i>)> {
830 let event = self.read_event_impl(())?;
831 Ok(self.ns_resolver.resolve_event(event))
832 }
833
834 /// Reads until end element is found. This function is supposed to be called
835 /// after you already read a [`Start`] event.
836 ///
837 /// Returns a span that cover content between `>` of an opening tag and `<` of
838 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
839 /// this method was called after reading expanded [`Start`] event.
840 ///
841 /// Manages nested cases where parent and child elements have the _literally_
842 /// same name.
843 ///
844 /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
845 /// will be returned. In particularly, that error will be returned if you call
846 /// this method without consuming the corresponding [`Start`] event first.
847 ///
848 /// The `end` parameter should contain name of the end element _in the reader
849 /// encoding_. It is good practice to always get that parameter using
850 /// [`BytesStart::to_end()`] method.
851 ///
852 /// There is no asynchronous `read_to_end_async()` version of this function,
853 /// because it is not necessary -- the contents are already in memory and no IO
854 /// is needed, therefore there is no potential for blocking.
855 ///
856 /// # Namespaces
857 ///
858 /// While the `NsReader` does namespace resolution, namespaces does not
859 /// change the algorithm for comparing names. Although the names `a:name`
860 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
861 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
862 /// according to [the specification]
863 ///
864 /// > The end of every element that begins with a **start-tag** MUST be marked
865 /// > by an **end-tag** containing a name that echoes the element's type as
866 /// > given in the **start-tag**
867 ///
868 /// # Examples
869 ///
870 /// This example shows, how you can skip XML content after you read the
871 /// start event.
872 ///
873 /// ```
874 /// # use pretty_assertions::assert_eq;
875 /// use quick_xml::events::{BytesStart, Event};
876 /// use quick_xml::name::{Namespace, ResolveResult};
877 /// use quick_xml::reader::NsReader;
878 ///
879 /// let mut reader = NsReader::from_str(r#"
880 /// <outer xmlns="namespace 1">
881 /// <inner xmlns="namespace 2">
882 /// <outer></outer>
883 /// </inner>
884 /// <inner>
885 /// <inner></inner>
886 /// <inner/>
887 /// <outer></outer>
888 /// <p:outer xmlns:p="ns"></p:outer>
889 /// <outer/>
890 /// </inner>
891 /// </outer>
892 /// "#);
893 /// reader.config_mut().trim_text(true);
894 ///
895 /// let ns = Namespace(b"namespace 1");
896 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
897 /// let end = start.to_end().into_owned();
898 ///
899 /// // First, we read a start event...
900 /// assert_eq!(
901 /// reader.read_resolved_event().unwrap(),
902 /// (ResolveResult::Bound(ns), Event::Start(start))
903 /// );
904 ///
905 /// // ...then, we could skip all events to the corresponding end event.
906 /// // This call will correctly handle nested <outer> elements.
907 /// // Note, however, that this method does not handle namespaces.
908 /// reader.read_to_end(end.name()).unwrap();
909 ///
910 /// // At the end we should get an Eof event, because we ate the whole XML
911 /// assert_eq!(
912 /// reader.read_resolved_event().unwrap(),
913 /// (ResolveResult::Unbound, Event::Eof)
914 /// );
915 /// ```
916 ///
917 /// [`Start`]: Event::Start
918 /// [`End`]: Event::End
919 /// [`IllFormed`]: crate::errors::Error::IllFormed
920 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
921 /// [`expand_empty_elements`]: Config::expand_empty_elements
922 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
923 #[inline]
924 pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
925 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
926 // match literally the start name. See `Config::check_end_names` documentation
927 let result = self.reader.read_to_end(end)?;
928 // read_to_end will consume closing tag. Because nobody can access to its
929 // content anymore, we directly pop namespace of the opening tag
930 self.ns_resolver.pop();
931 Ok(result)
932 }
933
934 /// Reads content between start and end tags, including any markup. This
935 /// function is supposed to be called after you already read a [`Start`] event.
936 ///
937 /// Manages nested cases where parent and child elements have the _literally_
938 /// same name.
939 ///
940 /// This method does not unescape read data, instead it returns content
941 /// "as is" of the XML document. This is because it has no idea what text
942 /// it reads, and if, for example, it contains CDATA section, attempt to
943 /// unescape it content will spoil data.
944 ///
945 /// Any text will be decoded using the XML current [`decoder()`].
946 ///
947 /// Actually, this method perform the following code:
948 ///
949 /// ```ignore
950 /// let span = reader.read_to_end(end)?;
951 /// let text = reader.decoder().decode(&reader.inner_slice[span]);
952 /// ```
953 ///
954 /// # Examples
955 ///
956 /// This example shows, how you can read a HTML content from your XML document.
957 ///
958 /// ```
959 /// # use pretty_assertions::assert_eq;
960 /// # use std::borrow::Cow;
961 /// use quick_xml::events::{BytesStart, Event};
962 /// use quick_xml::reader::NsReader;
963 ///
964 /// let mut reader = NsReader::from_str(r#"
965 /// <html>
966 /// <title>This is a HTML text</title>
967 /// <p>Usual XML rules does not apply inside it
968 /// <p>For example, elements not needed to be "closed"
969 /// </html>
970 /// "#);
971 /// reader.config_mut().trim_text(true);
972 ///
973 /// let start = BytesStart::new("html");
974 /// let end = start.to_end().into_owned();
975 ///
976 /// // First, we read a start event...
977 /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
978 /// // ...and disable checking of end names because we expect HTML further...
979 /// reader.config_mut().check_end_names = false;
980 ///
981 /// // ...then, we could read text content until close tag.
982 /// // This call will correctly handle nested <html> elements.
983 /// let text = reader.read_text(end.name()).unwrap();
984 /// assert_eq!(text, Cow::Borrowed(r#"
985 /// <title>This is a HTML text</title>
986 /// <p>Usual XML rules does not apply inside it
987 /// <p>For example, elements not needed to be "closed"
988 /// "#));
989 ///
990 /// // Now we can enable checks again
991 /// reader.config_mut().check_end_names = true;
992 ///
993 /// // At the end we should get an Eof event, because we ate the whole XML
994 /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
995 /// ```
996 ///
997 /// [`Start`]: Event::Start
998 /// [`decoder()`]: Reader::decoder()
999 #[inline]
1000 pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
1001 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
1002 // match literally the start name. See `Self::check_end_names` documentation
1003 let result = self.reader.read_text(end)?;
1004 // read_text will consume closing tag. Because nobody can access to its
1005 // content anymore, we directly pop namespace of the opening tag
1006 self.ns_resolver.pop();
1007 Ok(result)
1008 }
1009}
1010
1011impl<R> Deref for NsReader<R> {
1012 type Target = Reader<R>;
1013
1014 #[inline]
1015 fn deref(&self) -> &Self::Target {
1016 &self.reader
1017 }
1018}