1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
//! Module for managing `XmlnsReader` iterator

use {XmlReader, Event, Element};
use error::ResultPos;
use std::io::BufRead;

/// A namespace declaration. Can either bind a namespace to a prefix or define the current default
/// namespace.
#[derive(Clone)]
struct Namespace {
    /// * `Some(prefix)` binds this namespace to `prefix`.
    /// * `None` defines the current default namespace.
    prefix: Option<Vec<u8>>,
    /// The namespace name (the URI) of this namespace declaration.
    ///
    /// The XML standard specifies that an empty namespace value 'removes' a namespace declaration
    /// for the extent of its scope. For prefix declarations that's not very interesting, but it is
    /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default
    /// behaviour of leaving unqualified element names unqualified.
    value: Option<Vec<u8>>,
    /// Level of nesting at which this namespace was declared. The declaring element is included,
    /// i.e., a declaration on the document root has `level = 1`.
    /// This is used to pop the namespace when the element gets closed.
    level: i32,
}

impl Namespace {
    /// Check whether this namespace declaration matches the **qualified element name** name.
    /// Does not take default namespaces into account. The `matches_unqualified_elem` method is
    /// responsible for unqualified element names.
    ///
    /// [W3C Namespaces in XML 1.1 (2006)](http://w3.org/TR/xml-names11/#scoping-defaulting)
    #[inline]
    fn matches_qualified(&self, name: &[u8]) -> bool {
        if let Some(ref prefix) = self.prefix {
            let len = prefix.len();
            name.len() > len && name[len] == b':' && &name[..len] == &prefix[..]
        } else {
            false
        }
    }

    /// A namespace declaration matches unqualified elements if and only if it is a default
    /// namespace declaration (no prefix).
    ///
    /// [W3C Namespaces in XML 1.1 (2006)](http://w3.org/TR/xml-names11/#scoping-defaulting)
    #[inline]
    fn matches_unqualified_elem(&self) -> bool {
        self.prefix.is_none()
    }
}

/// `XmlnsReader` iterator which wraps `XmlReader` iterator and
/// adds namespace resolutions
///
/// # Example
///
/// ```
/// use quick_xml::{XmlReader, Event};
/// use quick_xml::namespace::XmlnsReader;
///
/// let xml = r#"<tag1 att1 = "test">
///                 <tag2><!--Test comment-->Test</tag2>
///                 <tag2>Test 2</tag2>
///             </tag1>"#;
/// let mut reader = XmlReader::from(xml).trim_text(true)
///                  .namespaced();
/// let mut count = 0;
/// let mut txt = Vec::new();
/// // need to use `while let` in order to have access to `reader.resolve` 
/// // for attributes namespaces
/// while let Some(r) = reader.next() {
///     match r {
///         // XmlnsReader iterates ResultPos<(Option<&[u8]>, Event)> with 
///         // the Option<&[u8]> being the resolved Namespace, if any
///         Ok((ref n, Event::Start(ref e))) => {
///             match e.name() {
///                 b"tag1" => println!("attributes keys: {:?}",
///                                  e.attributes()
///                                  // use `reader.resolve` to get attribute
///                                  // namespace resolution
///                                  .map(|a| reader.resolve(a.unwrap().0))
///                                  .collect::<Vec<_>>()),
///                 b"tag2" => count += 1,
///                 _ => (),
///             }
///         },
///         Ok((_, Event::Text(e))) => txt.push(e.into_string()),
///         Err((e, pos)) => panic!("{:?} at position {}", e, pos),
///         _ => (),
///     }
/// }
/// ```
#[derive(Clone)]
pub struct XmlnsReader<R: BufRead> {
    reader: XmlReader<R>,
    namespaces: Vec<Namespace>,
    /// The number of open tags at the moment. We need to keep track of this to know which namespace
    /// declarations to remove when we encounter an `End` event.
    nesting_level: i32,
    /// For `Empty` events keep the 'scope' of the element on the stack artificially. That way, the
    /// consumer has a chance to use `resolve` in the context of the empty element. We perform the
    /// pop as the first operation in the next `next()` call.
    pending_pop: bool
}

impl<R: BufRead> XmlnsReader<R> {
    /// Converts a `XmlReader` into a `XmlnsReader` iterator
    pub fn new(reader: XmlReader<R>) -> XmlnsReader<R> {
        XmlnsReader {
            reader: reader,
            namespaces: Vec::new(),
            nesting_level: 0,
            pending_pop: false
        }
    }

    /// Resolves a potentially qualified **attribute name** into (namespace name, local name).
    ///
    /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined
    /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix
    /// can be defined on the same element as the attribute in question.
    ///
    /// *Unqualified* attribute names do *not* inherit the current *default namespace*.
    pub fn resolve<'a, 'b>(&'a self, qname: &'b [u8]) 
        -> (Option<&'a [u8]>, &'b [u8]) 
    {
        // Unqualified attributes don't inherit the default namespace. We don't need to search the
        // namespace declaration stack for those.
        if !qname.contains(&b':') {
            return (None, qname)
        }

        match self.namespaces.iter().rev().find(|ref n| n.matches_qualified(qname)) {
            // Found closest matching namespace declaration `n`. The `unwrap` is fine because
            // `is_match_attr` doesn't return default namespace declarations.
            Some(&Namespace { ref prefix, value: Some(ref value), .. }) =>
                (Some(&value[..]), &qname[(prefix.as_ref().unwrap().len() + 1)..]),
            Some(&Namespace { ref prefix, value: None, .. }) =>
                (None, &qname[(prefix.as_ref().unwrap().len() + 1)..]),
            None => (None, qname),
        }
    }

    fn find_namespace_value(&self, e: &Element) -> Option<Vec<u8>> {
        // We pulled the qualified-vs-unqualified check out here so that it doesn't happen for each
        // namespace we are comparing against.
        let element_name = e.name();
        if element_name.contains(&b':') {
            // qualified name
            self.namespaces
                .iter()
                .rev() // iterate in reverse order to find the most recent one
                .find(|ref n| n.matches_qualified(element_name))
                .and_then(|ref n| n.value.as_ref().map(|ns| ns.clone()))
        } else {
            // unqualified name (inherits current default namespace)
            self.namespaces
                .iter()
                .rev() // iterate in reverse order to find the most recent one
                .find(|ref n| n.matches_unqualified_elem())
                .and_then(|ref n| n.value.as_ref().map(|ns| ns.clone()))
        }
    }

    fn pop_empty_namespaces(&mut self) {
        let current_level = self.nesting_level;
        // from the back (most deeply nested scope), look for the first scope that is still valid
        match self.namespaces.iter().rposition(|n| n.level <= current_level) {
            // none of the namespaces are valid, remove all of them
            None => self.namespaces.clear(),
            // drop all namespaces past the last valid namespace
            Some(last_valid_pos) => self.namespaces.truncate(last_valid_pos + 1)
        }
    }

    fn push_new_namespaces(&mut self, e: &Element) {
        // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns'
        // (default namespace) attribute.
        for a in e.attributes().with_checks(false) {
            if let Ok((k, v)) = a {
                // Check for 'xmlns:any-prefix' and 'xmlns' at the same time:
                if k.len() >= 5 && &k[..5] == b"xmlns" && (k.len() == 5 || k[5] == b':') {
                    // We use an None prefix as the 'name' for the default namespace.
                    // That saves an allocation compared to an empty namespace name.
                    let prefix = if k.len() == 5 { None } else { Some(k[6..].to_vec()) };
                    let ns_value = if v.len() == 0 { None } else { Some(v.to_vec()) };
                    self.namespaces.push(Namespace {
                        prefix: prefix,
                        value: ns_value,
                        level: self.nesting_level,
                    });
                }
            } else {
                break;
            }
        }
    }
}

impl<R: BufRead> Iterator for XmlnsReader<R> {
    type Item = ResultPos<(Option<Vec<u8>>, Event)>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.pending_pop {
            self.pending_pop = false;
            self.nesting_level -= 1;
            self.pop_empty_namespaces();
        }
        match self.reader.next() {
            Some(Ok(Event::Start(e))) => {
                self.nesting_level += 1;
                self.push_new_namespaces(&e);
                Some(Ok((self.find_namespace_value(&e), Event::Start(e))))
            }
            Some(Ok(Event::Empty(e))) => {
                // For empty elements we need to 'artificially' keep the namespace scope on the
                // stack until the next `next()` call occurs.
                // Otherwise the caller has no chance to use `resolve` in the context of the
                // namespace declarations that are 'in scope' for the empty element alone.
                // Ex: <img rdf:nodeID="abc" xmlns:rdf="urn:the-rdf-uri" />
                self.nesting_level += 1;
                self.push_new_namespaces(&e);
                // notify next `next()` invocation that it needs to pop this namespace scope
                self.pending_pop = true;
                Some(Ok((self.find_namespace_value(&e), Event::Empty(e))))
            }
            Some(Ok(Event::End(e))) => {
                // need to determine namespace of end element *before* we pop the current
                // namespace scope. If namespace prefixes are shadowed or if default namespaces are
                // defined, it is vital that we resolve the namespace of the end tag in the scope
                // of that tag (not in the outer scope).
                let element_ns = self.find_namespace_value(&e);
                self.nesting_level -= 1;
                self.pop_empty_namespaces();
                Some(Ok((element_ns, Event::End(e))))
                // It could be argued that the 'End' event should also defer the 'pop' operation to
                // the next `next()` call. The end tag still technically belongs to the
                // 'tag scope'. Not sure if that behaviour is intuitive, though.
            }
            Some(Ok(e)) => Some(Ok((None, e))),
            Some(Err(e)) => Some(Err(e)),
            None => None,
        }
    }
}