libxml/readonly/
tree.rs

1use libc::{c_char, c_void};
2use std::collections::{HashMap, HashSet};
3use std::ffi::{CStr, CString};
4use std::ptr;
5use std::str;
6
7use crate::bindings::*;
8use crate::c_helpers::*;
9use crate::tree::namespace::Namespace;
10use crate::tree::nodetype::NodeType;
11use crate::tree::Document;
12use crate::xpath::Context;
13
14/// Lightweight struct for read-only parallel processing
15#[derive(Debug, Copy, Clone)]
16pub struct RoNode(pub(crate) xmlNodePtr);
17
18// we claim Sync and Send, as we are in read-only mode over the owning document
19unsafe impl Sync for RoNode {}
20unsafe impl Send for RoNode {}
21
22impl PartialEq for RoNode {
23  /// Two nodes are considered equal, if they point to the same xmlNode.
24  fn eq(&self, other: &RoNode) -> bool {
25    std::ptr::eq(self.0, other.0)
26  }
27}
28impl Eq for RoNode {}
29
30impl RoNode {
31  /// Immutably borrows the underlying libxml2 `xmlNodePtr` pointer
32  pub fn node_ptr(&self) -> xmlNodePtr {
33    self.0
34  }
35
36  /// Returns the next sibling if it exists
37  pub fn get_next_sibling(self) -> Option<RoNode> {
38    let ptr = xmlNextSibling(self.0);
39    self.ptr_as_option(ptr)
40  }
41
42  /// Returns the previous sibling if it exists
43  pub fn get_prev_sibling(self) -> Option<RoNode> {
44    let ptr = xmlPrevSibling(self.0);
45    self.ptr_as_option(ptr)
46  }
47
48  /// Returns the first child if it exists
49  pub fn get_first_child(self) -> Option<RoNode> {
50    let ptr = xmlGetFirstChild(self.0);
51    self.ptr_as_option(ptr)
52  }
53
54  /// Returns the last child if it exists
55  pub fn get_last_child(self) -> Option<RoNode> {
56    let ptr = unsafe { xmlGetLastChild(self.0) };
57    self.ptr_as_option(ptr)
58  }
59
60  /// Returns the next element sibling if it exists
61  pub fn get_next_element_sibling(&self) -> Option<RoNode> {
62    match self.get_next_sibling() {
63      None => None,
64      Some(child) => {
65        let mut current_node = child;
66        while !current_node.is_element_node() {
67          if let Some(sibling) = current_node.get_next_sibling() {
68            current_node = sibling;
69          } else {
70            break;
71          }
72        }
73        if current_node.is_element_node() {
74          Some(current_node)
75        } else {
76          None
77        }
78      }
79    }
80  }
81
82  /// Returns the previous element sibling if it exists
83  pub fn get_prev_element_sibling(&self) -> Option<RoNode> {
84    match self.get_prev_sibling() {
85      None => None,
86      Some(child) => {
87        let mut current_node = child;
88        while !current_node.is_element_node() {
89          if let Some(sibling) = current_node.get_prev_sibling() {
90            current_node = sibling;
91          } else {
92            break;
93          }
94        }
95        if current_node.is_element_node() {
96          Some(current_node)
97        } else {
98          None
99        }
100      }
101    }
102  }
103
104  /// Returns the first element child if it exists
105  pub fn get_first_element_child(self) -> Option<RoNode> {
106    match self.get_first_child() {
107      None => None,
108      Some(child) => {
109        let mut current_node = child;
110        while !current_node.is_element_node() {
111          if let Some(sibling) = current_node.get_next_sibling() {
112            current_node = sibling;
113          } else {
114            break;
115          }
116        }
117        if current_node.is_element_node() {
118          Some(current_node)
119        } else {
120          None
121        }
122      }
123    }
124  }
125
126  /// Returns the last element child if it exists
127  pub fn get_last_element_child(&self) -> Option<RoNode> {
128    match self.get_last_child() {
129      None => None,
130      Some(child) => {
131        let mut current_node = child;
132        while !current_node.is_element_node() {
133          if let Some(sibling) = current_node.get_prev_sibling() {
134            current_node = sibling;
135          } else {
136            break;
137          }
138        }
139        if current_node.is_element_node() {
140          Some(current_node)
141        } else {
142          None
143        }
144      }
145    }
146  }
147
148  /// Returns all child nodes of the given node as a vector
149  pub fn get_child_nodes(self) -> Vec<RoNode> {
150    let mut children = Vec::new();
151    if let Some(first_child) = self.get_first_child() {
152      children.push(first_child);
153      while let Some(sibling) = children.last().unwrap().get_next_sibling() {
154        children.push(sibling)
155      }
156    }
157    children
158  }
159
160  /// Returns all child elements of the given node as a vector
161  pub fn get_child_elements(self) -> Vec<RoNode> {
162    self
163      .get_child_nodes()
164      .into_iter()
165      .filter(|n| n.get_type() == Some(NodeType::ElementNode))
166      .collect::<Vec<RoNode>>()
167  }
168
169  /// Returns the parent if it exists
170  pub fn get_parent(self) -> Option<RoNode> {
171    let ptr = xmlGetParent(self.0);
172    self.ptr_as_option(ptr)
173  }
174
175  /// Get the node type
176  pub fn get_type(self) -> Option<NodeType> {
177    NodeType::from_int(xmlGetNodeType(self.0))
178  }
179
180  /// Returns true if it is a text node
181  pub fn is_text_node(self) -> bool {
182    self.get_type() == Some(NodeType::TextNode)
183  }
184
185  /// Checks if the given node is an Element
186  pub fn is_element_node(self) -> bool {
187    self.get_type() == Some(NodeType::ElementNode)
188  }
189
190  /// Checks if the underlying libxml2 pointer is `NULL`
191  pub fn is_null(self) -> bool {
192    self.0.is_null()
193  }
194
195  /// Returns the name of the node (empty string if name pointer is `NULL`)
196  pub fn get_name(self) -> String {
197    let name_ptr = xmlNodeGetName(self.0);
198    if name_ptr.is_null() {
199      return String::new();
200    } //empty string
201    let c_string = unsafe { CStr::from_ptr(name_ptr) };
202    c_string.to_string_lossy().into_owned()
203  }
204
205  /// Returns the content of the node
206  /// (assumes UTF-8 XML document)
207  pub fn get_content(self) -> String {
208    let content_ptr = unsafe { xmlNodeGetContent(self.0) };
209    if content_ptr.is_null() {
210      //empty string when none
211      return String::new();
212    }
213    let c_string = unsafe { CStr::from_ptr(content_ptr as *const c_char) };
214    let rust_utf8 = c_string.to_string_lossy().into_owned();
215    bindgenFree(content_ptr as *mut c_void);
216    rust_utf8
217  }
218
219  /// Returns the value of property `name`
220  pub fn get_property(self, name: &str) -> Option<String> {
221    let c_name = CString::new(name).unwrap();
222    let value_ptr = unsafe { xmlGetProp(self.0, c_name.as_bytes().as_ptr()) };
223    if value_ptr.is_null() {
224      return None;
225    }
226    let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) };
227    let prop_str = c_value_string.to_string_lossy().into_owned();
228    bindgenFree(value_ptr as *mut c_void);
229    Some(prop_str)
230  }
231
232  /// Returns the value of property `name` in namespace `ns`
233  pub fn get_property_ns(self, name: &str, ns: &str) -> Option<String> {
234    let c_name = CString::new(name).unwrap();
235    let c_ns = CString::new(ns).unwrap();
236    let value_ptr =
237      unsafe { xmlGetNsProp(self.0, c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr()) };
238    if value_ptr.is_null() {
239      return None;
240    }
241    let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) };
242    let prop_str = c_value_string.to_string_lossy().into_owned();
243    bindgenFree(value_ptr as *mut c_void);
244    Some(prop_str)
245  }
246
247  /// Returns the value of property `name` with no namespace
248  pub fn get_property_no_ns(self, name: &str) -> Option<String> {
249    let c_name = CString::new(name).unwrap();
250    let value_ptr = unsafe { xmlGetNoNsProp(self.0, c_name.as_bytes().as_ptr()) };
251    if value_ptr.is_null() {
252      return None;
253    }
254    let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) };
255    let prop_str = c_value_string.to_string_lossy().into_owned();
256    bindgenFree(value_ptr as *mut c_void);
257    Some(prop_str)
258  }
259
260  /// Return an attribute as a `Node` struct of type AttributeNode
261  pub fn get_property_node(self, name: &str) -> Option<RoNode> {
262    let c_name = CString::new(name).unwrap();
263    unsafe {
264      let attr_node = xmlHasProp(self.0, c_name.as_bytes().as_ptr());
265      self.ptr_as_option(attr_node as xmlNodePtr)
266    }
267  }
268
269  /// Return an attribute in a namespace `ns` as a `Node` of type AttributeNode
270  pub fn get_property_node_ns(self, name: &str, ns: &str) -> Option<RoNode> {
271    let c_name = CString::new(name).unwrap();
272    let c_ns = CString::new(ns).unwrap();
273    let attr_node =
274      unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr()) };
275    self.ptr_as_option(attr_node as xmlNodePtr)
276  }
277
278  /// Return an attribute with no namespace as a `Node` of type AttributeNode
279  pub fn get_property_node_no_ns(self, name: &str) -> Option<RoNode> {
280    let c_name = CString::new(name).unwrap();
281    let attr_node = unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), ptr::null()) };
282    self.ptr_as_option(attr_node as xmlNodePtr)
283  }
284
285  /// Alias for get_property
286  pub fn get_attribute(self, name: &str) -> Option<String> {
287    self.get_property(name)
288  }
289
290  /// Alias for get_property_ns
291  pub fn get_attribute_ns(self, name: &str, ns: &str) -> Option<String> {
292    self.get_property_ns(name, ns)
293  }
294
295  /// Alias for get_property_no_ns
296  pub fn get_attribute_no_ns(self, name: &str) -> Option<String> {
297    self.get_property_no_ns(name)
298  }
299
300  /// Alias for get_property_node
301  pub fn get_attribute_node(self, name: &str) -> Option<RoNode> {
302    self.get_property_node(name)
303  }
304
305  /// Alias for get_property_node_ns
306  pub fn get_attribute_node_ns(self, name: &str, ns: &str) -> Option<RoNode> {
307    self.get_property_node_ns(name, ns)
308  }
309
310  /// Alias for get_property_node_no_ns
311  pub fn get_attribute_node_no_ns(self, name: &str) -> Option<RoNode> {
312    self.get_property_node_no_ns(name)
313  }
314
315  /// Get a copy of the attributes of this node
316  pub fn get_properties(self) -> HashMap<String, String> {
317    let mut attributes = HashMap::new();
318
319    let mut current_prop = xmlGetFirstProperty(self.0);
320    while !current_prop.is_null() {
321      let name_ptr = xmlAttrName(current_prop);
322      let c_name_string = unsafe { CStr::from_ptr(name_ptr) };
323      let name = c_name_string.to_string_lossy().into_owned();
324      let value = self.get_property(&name).unwrap_or_default();
325      attributes.insert(name, value);
326      current_prop = xmlNextPropertySibling(current_prop);
327    }
328
329    attributes
330  }
331
332  /// Get a copy of this node's attributes and their namespaces
333  pub fn get_properties_ns(self) -> HashMap<(String, Option<Namespace>), String> {
334    let mut attributes = HashMap::new();
335
336    let mut current_prop = xmlGetFirstProperty(self.0);
337    while !current_prop.is_null() {
338      let name_ptr = xmlAttrName(current_prop);
339      let c_name_string = unsafe { CStr::from_ptr(name_ptr) };
340      let name = c_name_string.to_string_lossy().into_owned();
341      let ns_ptr = xmlAttrNs(current_prop);
342      if ns_ptr.is_null() {
343        let value = self.get_property_no_ns(&name).unwrap_or_default();
344        attributes.insert((name, None), value);
345      } else {
346        let ns = Namespace { ns_ptr };
347        let value = self
348          .get_property_ns(&name, &ns.get_href())
349          .unwrap_or_default();
350        attributes.insert((name, Some(ns)), value);
351      }
352      current_prop = xmlNextPropertySibling(current_prop);
353    }
354
355    attributes
356  }
357
358  /// Alias for `get_properties`
359  pub fn get_attributes(self) -> HashMap<String, String> {
360    self.get_properties()
361  }
362
363  /// Alias for `get_properties_ns`
364  pub fn get_attributes_ns(self) -> HashMap<(String, Option<Namespace>), String> {
365    self.get_properties_ns()
366  }
367
368  /// Check if a property has been defined, without allocating its value
369  pub fn has_property(self, name: &str) -> bool {
370    let c_name = CString::new(name).unwrap();
371    let value_ptr = unsafe { xmlHasProp(self.0, c_name.as_bytes().as_ptr()) };
372    !value_ptr.is_null()
373  }
374
375  /// Check if property `name` in namespace `ns` exists
376  pub fn has_property_ns(self, name: &str, ns: &str) -> bool {
377    let c_name = CString::new(name).unwrap();
378    let c_ns = CString::new(ns).unwrap();
379    let value_ptr =
380      unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr()) };
381    !value_ptr.is_null()
382  }
383
384  /// Check if property `name` with no namespace exists
385  pub fn has_property_no_ns(self, name: &str) -> bool {
386    let c_name = CString::new(name).unwrap();
387    let value_ptr = unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), ptr::null()) };
388    !value_ptr.is_null()
389  }
390
391  /// Alias for has_property
392  pub fn has_attribute(self, name: &str) -> bool {
393    self.has_property(name)
394  }
395
396  /// Alias for has_property_ns
397  pub fn has_attribute_ns(self, name: &str, ns: &str) -> bool {
398    self.has_property_ns(name, ns)
399  }
400
401  /// Alias for has_property_no_ns
402  pub fn has_attribute_no_ns(self, name: &str) -> bool {
403    self.has_property_no_ns(name)
404  }
405
406  /// Gets the active namespace associated of this node
407  pub fn get_namespace(self) -> Option<Namespace> {
408    let ns_ptr = xmlNodeNs(self.0);
409    if ns_ptr.is_null() {
410      None
411    } else {
412      Some(Namespace { ns_ptr })
413    }
414  }
415
416  /// Gets a list of namespaces associated with this node
417  pub fn get_namespaces(self, doc: &Document) -> Vec<Namespace> {
418    let list_ptr_raw = unsafe { xmlGetNsList(doc.doc_ptr(), self.0) };
419    if list_ptr_raw.is_null() {
420      Vec::new()
421    } else {
422      let mut namespaces = Vec::new();
423      let mut ptr_iter = list_ptr_raw as *mut xmlNsPtr;
424      unsafe {
425        while !ptr_iter.is_null() && !(*ptr_iter).is_null() {
426          namespaces.push(Namespace { ns_ptr: *ptr_iter });
427          ptr_iter = ptr_iter.add(1);
428        }
429        /* TODO: valgrind suggests this technique isn't sufficiently fluent:
430          ==114895== Conditional jump or move depends on uninitialised value(s)
431          ==114895==    at 0x4E9962F: xmlFreeNs (in /usr/lib/x86_64-linux-gnu/libxml2.so.2.9.4)
432          ==114895==    by 0x195CE8: libxml::tree::Node::get_namespaces (tree.rs:723)
433          ==114895==    by 0x12E7B6: base_tests::can_work_with_namespaces (base_tests.rs:537)
434          DG: I could not improve on this state without creating memory leaks after ~1 hour, so I am
435          marking it as future work.
436        */
437        /* TODO: How do we properly deallocate here? The approach bellow reliably segfaults tree_tests on 1 thread */
438        // println!("\n-- xmlfreens on : {:?}", list_ptr_raw);
439        // xmlFreeNs(list_ptr_raw as xmlNsPtr);
440      }
441      namespaces
442    }
443  }
444
445  /// Get a list of namespaces declared with this node
446  pub fn get_namespace_declarations(self) -> Vec<Namespace> {
447    if self.get_type() != Some(NodeType::ElementNode) {
448      // only element nodes can have declarations
449      return Vec::new();
450    }
451    let mut namespaces = Vec::new();
452    let mut ns_ptr = xmlNodeNsDeclarations(self.0);
453    while !ns_ptr.is_null() {
454      if !xmlNsPrefix(ns_ptr).is_null() || !xmlNsHref(ns_ptr).is_null() {
455        namespaces.push(Namespace { ns_ptr });
456      }
457      ns_ptr = xmlNextNsSibling(ns_ptr);
458    }
459    namespaces
460  }
461
462  /// Looks up the prefix of a namespace from its URI, basedo around a given `Node`
463  pub fn lookup_namespace_prefix(self, href: &str) -> Option<String> {
464    if href.is_empty() {
465      return None;
466    }
467    let c_href = CString::new(href).unwrap();
468    unsafe {
469      let ptr_mut = self.0;
470      let ns_ptr = xmlSearchNsByHref(xmlGetDoc(ptr_mut), ptr_mut, c_href.as_bytes().as_ptr());
471      if !ns_ptr.is_null() {
472        let ns = Namespace { ns_ptr };
473        let ns_prefix = ns.get_prefix();
474        Some(ns_prefix)
475      } else {
476        None
477      }
478    }
479  }
480
481  /// Looks up the uri of a namespace from its prefix, basedo around a given `Node`
482  pub fn lookup_namespace_uri(self, prefix: &str) -> Option<String> {
483    if prefix.is_empty() {
484      return None;
485    }
486    let c_prefix = CString::new(prefix).unwrap();
487    unsafe {
488      let ns_ptr = xmlSearchNs(xmlGetDoc(self.0), self.0, c_prefix.as_bytes().as_ptr());
489      if !ns_ptr.is_null() {
490        let ns = Namespace { ns_ptr };
491        let ns_prefix = ns.get_href();
492        if !ns_prefix.is_empty() {
493          Some(ns_prefix)
494        } else {
495          None
496        }
497      } else {
498        None
499      }
500    }
501  }
502
503  /// Get a set of class names from this node's attributes
504  pub fn get_class_names(self) -> HashSet<String> {
505    let mut set = HashSet::new();
506    if let Some(value) = self.get_property("class") {
507      for n in value.split(' ') {
508        set.insert(n.to_owned());
509      }
510    }
511    set
512  }
513
514  /// find read-only nodes via xpath, at the specified node and a given document
515  pub fn findnodes(self, xpath: &str, owner: &Document) -> Result<Vec<RoNode>, ()> {
516    let context = Context::new(owner)?;
517    let evaluated = context.node_evaluate_readonly(xpath, self)?;
518    Ok(evaluated.get_readonly_nodes_as_vec())
519  }
520
521  /// Read-only nodes are always linked
522  pub fn is_unlinked(self) -> bool {
523    false
524  }
525  /// Read-only nodes only need a null check
526  fn ptr_as_option(self, node_ptr: xmlNodePtr) -> Option<RoNode> {
527    if node_ptr.is_null() {
528      None
529    } else {
530      Some(RoNode(node_ptr))
531    }
532  }
533
534  /// `libc::c_void` isn't hashable and cannot be made hashable
535  pub fn to_hashable(self) -> usize {
536    self.0 as usize
537  }
538  /// Create a mock node, used for a placeholder argument
539  pub fn null() -> Self {
540    RoNode(ptr::null_mut())
541  }
542}