1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//! Entity resolver module
use Infallible;
use Error;
use crateresolve_predefined_entity;
use crateBytesText;
/// Used to resolve unknown entities while parsing
///
/// # Example
///
/// ```
/// # use serde::Deserialize;
/// # use pretty_assertions::assert_eq;
/// use regex::bytes::Regex;
/// use std::collections::BTreeMap;
/// use std::string::FromUtf8Error;
/// use quick_xml::de::{Deserializer, EntityResolver};
/// use quick_xml::events::BytesText;
///
/// struct DocTypeEntityResolver {
/// re: Regex,
/// map: BTreeMap<String, String>,
/// }
///
/// impl Default for DocTypeEntityResolver {
/// fn default() -> Self {
/// Self {
/// // We do not focus on true parsing in this example
/// // You should use special libraries to parse DTD
/// re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#).unwrap(),
/// map: BTreeMap::new(),
/// }
/// }
/// }
///
/// impl EntityResolver for DocTypeEntityResolver {
/// type Error = FromUtf8Error;
///
/// fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> {
/// for cap in self.re.captures_iter(&doctype) {
/// self.map.insert(
/// String::from_utf8(cap[1].to_vec())?,
/// String::from_utf8(cap[2].to_vec())?,
/// );
/// }
/// Ok(())
/// }
///
/// fn resolve(&self, entity: &str) -> Option<&str> {
/// self.map.get(entity).map(|s| s.as_str())
/// }
/// }
///
/// let xml_reader = br#"
/// <!DOCTYPE dict[ <!ENTITY e1 "entity 1"> ]>
/// <root>
/// <entity_one>&e1;</entity_one>
/// </root>
/// "#.as_ref();
///
/// let mut de = Deserializer::with_resolver(
/// xml_reader,
/// DocTypeEntityResolver::default(),
/// );
/// let data: BTreeMap<String, String> = BTreeMap::deserialize(&mut de).unwrap();
///
/// assert_eq!(data.get("entity_one"), Some(&"entity 1".to_string()));
/// ```
/// An [`EntityResolver`] that resolves only predefined entities:
///
/// | Entity | Resolution
/// |--------|------------
/// |`<` | `<`
/// |`>` | `>`
/// |`&` | `&`
/// |`'`| `'`
/// |`"`| `"`
;