xml_doc/
document.rs

1use crate::element::{Element, ElementData};
2use crate::error::{Error, Result};
3use crate::parser::{DocumentParser, ReadOptions};
4use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
5use quick_xml::Writer;
6use std::fs::File;
7use std::io::{Read, Write};
8use std::path::Path;
9use std::str::FromStr;
10
11/// Represents an XML node.
12#[derive(Debug)]
13pub enum Node {
14    /// XML Element
15    Element(Element),
16    /// XML Character Data ([specification](https://www.w3.org/TR/xml/#syntax))
17    Text(String),
18    /// Comments ([specification](https://www.w3.org/TR/xml/#sec-comments))
19    Comment(String),
20    /// CDATA ([specification](https://www.w3.org/TR/xml/#sec-cdata-sect))
21    CData(String),
22    /// Processing Instruction ([specification](https://www.w3.org/TR/xml/#sec-pi))
23    PI(String),
24    /// Document Type Declaration ([specification](https://www.w3.org/TR/xml/#sec-prolog-dtd))
25    DocType(String),
26}
27
28impl Node {
29    /// Useful to use inside `filter_map`.
30    ///
31    /// ```
32    /// use xml_doc::{Document, Element};
33    ///
34    /// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
35    /// <config>
36    ///     Random Text
37    ///     <max>1</max>
38    /// </config>
39    /// "#).unwrap();
40    ///
41    /// let elems: Vec<Element> = doc
42    ///     .root_element()
43    ///     .unwrap()
44    ///     .children(&doc)
45    ///     .iter()
46    ///     .filter_map(|n| n.as_element())
47    ///     .collect();
48    /// ```
49    pub fn as_element(&self) -> Option<Element> {
50        match self {
51            Self::Element(elem) => Some(*elem),
52            _ => None,
53        }
54    }
55
56    pub(crate) fn build_text_content<'a>(&self, doc: &'a Document, buf: &'a mut String) {
57        match self {
58            Node::Element(elem) => elem.build_text_content(doc, buf),
59            Node::Text(text) => buf.push_str(text),
60            Node::CData(text) => buf.push_str(text),
61            Node::PI(text) => buf.push_str(text),
62            _ => {}
63        }
64    }
65
66    /// Returns content if node is `Text`, `CData`, or `PI`.
67    /// If node is `Element`, return [Element::text_content()]
68    ///
69    /// Implementation of [Node.textContent](https://developer.mozilla.org/en-US/docs/Web/API/Node/textContent)
70    pub fn text_content(&self, doc: &Document) -> String {
71        let mut buf = String::new();
72        self.build_text_content(doc, &mut buf);
73        buf
74    }
75}
76
77/// Represents a XML document or a document fragment.
78///
79/// To build a document from scratch, use [`Document::new`].
80///
81/// To read and modify an existing document, use [parse_*](`Document#parsing`) methods.
82///
83/// To write the document, use [write_*](`Document#writing`) methods.
84///
85/// # Examples
86/// ```
87/// use xml_doc::Document;
88///
89/// let mut doc = Document::parse_str(r#"<?xml version="1.0" encoding="UTF-8"?>
90/// <package>
91///     <metadata>
92///         <author>Lewis Carol</author>
93///     </metadata>
94/// </package>
95/// "#).unwrap();
96/// let author_elem = doc
97///   .root_element()
98///   .unwrap()
99///   .find(&doc, "metadata")
100///   .unwrap()
101///   .find(&doc, "author")
102///   .unwrap();
103/// author_elem.set_text_content(&mut doc, "Lewis Carroll");
104/// let xml = doc.write_str();
105/// ```
106///
107
108#[derive(Debug)]
109pub struct Document {
110    pub(crate) counter: usize, // == self.store.len()
111    pub(crate) store: Vec<ElementData>,
112    container: Element,
113
114    pub(crate) version: String,
115    pub(crate) standalone: bool,
116}
117
118impl Document {
119    /// Create a blank new xml document.
120    pub fn new() -> Document {
121        let (container, container_data) = Element::container();
122        Document {
123            counter: 1, // because container is id 0
124            store: vec![container_data],
125            container,
126            version: String::from("1.0"),
127            standalone: false,
128        }
129    }
130
131    /// Get 'container' element of Document.
132    ///
133    /// The document uses an invisible 'container' element
134    /// which it uses to manage its root nodes.
135    ///
136    /// Its parent is None, and trying to change its parent will
137    /// return [`Error::ContainerCannotMove`].
138    ///
139    /// For the container element, only its `children` is relevant.
140    /// Other attributes are not used.
141    pub fn container(&self) -> Element {
142        self.container
143    }
144
145    /// Returns `true` if document doesn't have any nodes.
146    /// Returns `false` if you added a node or parsed an xml.
147    ///
148    /// You can only call `parse_*()` if document is empty.
149    pub fn is_empty(&self) -> bool {
150        self.store.len() == 1
151    }
152
153    /// Get root nodes of document.
154    pub fn root_nodes(&self) -> &Vec<Node> {
155        self.container.children(self)
156    }
157
158    /// Get first root node that is an element.
159    pub fn root_element(&self) -> Option<Element> {
160        self.container.child_elements(self).get(0).copied()
161    }
162
163    /// Push a node to end of root nodes.
164    /// If doc has no [`Element`], pushing a [`Node::Element`] is
165    /// equivalent to setting it as root element.
166    pub fn push_root_node(&mut self, node: Node) -> Result<()> {
167        let elem = self.container;
168        elem.push_child(self, node)
169    }
170}
171
172/// &nbsp;
173/// # Parsing
174///
175/// Below are methods for parsing xml.
176/// Parsing from string, file, and reader is supported.
177///
178/// Call `parse_*_with_opts` with custom [`ReadOptions`] to change parser behaviour.
179/// Otherwise, [`ReadOptions::default()`] is used.
180///
181/// # Errors
182/// - [`Error::CannotDecode`]: Could not decode XML. XML declaration may have invalid encoding value.
183/// - [`Error::MalformedXML`]: Could not read XML.
184/// - [`Error::Io`]: IO Error
185impl Document {
186    pub fn parse_str(str: &str) -> Result<Document> {
187        DocumentParser::parse_reader(str.as_bytes(), ReadOptions::default())
188    }
189    pub fn parse_str_with_opts(str: &str, opts: ReadOptions) -> Result<Document> {
190        DocumentParser::parse_reader(str.as_bytes(), opts)
191    }
192
193    pub fn parse_file<P: AsRef<Path>>(path: P) -> Result<Document> {
194        let file = File::open(path)?;
195        DocumentParser::parse_reader(file, ReadOptions::default())
196    }
197    pub fn parse_file_with_opts<P: AsRef<Path>>(path: P, opts: ReadOptions) -> Result<Document> {
198        let file = File::open(path)?;
199        DocumentParser::parse_reader(file, opts)
200    }
201
202    pub fn parse_reader<R: Read>(reader: R) -> Result<Document> {
203        DocumentParser::parse_reader(reader, ReadOptions::default())
204    }
205    pub fn parse_reader_with_opts<R: Read>(reader: R, opts: ReadOptions) -> Result<Document> {
206        DocumentParser::parse_reader(reader, opts)
207    }
208}
209
210/// Options when writing XML.
211pub struct WriteOptions {
212    /// Byte character to indent with. (default: `b' '`)
213    pub indent_char: u8,
214    /// How many indent_char should be used for indent. (default: 2)
215    pub indent_size: usize,
216    /// XML declaration should be written at the top. (default: `true`)
217    pub write_decl: bool,
218}
219
220impl WriteOptions {
221    pub fn default() -> WriteOptions {
222        WriteOptions {
223            indent_char: b' ',
224            indent_size: 2,
225            write_decl: true,
226        }
227    }
228}
229
230/// &nbsp;
231/// # Writing
232///
233/// Below are methods for writing xml.
234/// The XML will be written in UTF-8.
235impl Document {
236    pub fn write_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
237        self.write_file_with_opts(path, WriteOptions::default())
238    }
239    pub fn write_file_with_opts<P: AsRef<Path>>(&self, path: P, opts: WriteOptions) -> Result<()> {
240        let mut file = File::open(path)?;
241        self.write_with_opts(&mut file, opts)
242    }
243
244    pub fn write_str(&self) -> Result<String> {
245        self.write_str_with_opts(WriteOptions::default())
246    }
247    pub fn write_str_with_opts(&self, opts: WriteOptions) -> Result<String> {
248        let mut buf: Vec<u8> = Vec::with_capacity(200);
249        self.write_with_opts(&mut buf, opts)?;
250        Ok(String::from_utf8(buf)?)
251    }
252
253    pub fn write(&self, writer: &mut impl Write) -> Result<()> {
254        self.write_with_opts(writer, WriteOptions::default())
255    }
256    pub fn write_with_opts(&self, writer: &mut impl Write, opts: WriteOptions) -> Result<()> {
257        let container = self.container();
258        let mut writer = Writer::new_with_indent(writer, opts.indent_char, opts.indent_size);
259        if opts.write_decl {
260            self.write_decl(&mut writer)?;
261        }
262        self.write_nodes(&mut writer, container.children(self))?;
263        writer.write_event(Event::Eof)?;
264        Ok(())
265    }
266
267    fn write_decl(&self, writer: &mut Writer<impl Write>) -> Result<()> {
268        let standalone = match self.standalone {
269            true => Some("yes".as_bytes()),
270            false => None,
271        };
272        writer.write_event(Event::Decl(BytesDecl::new(
273            self.version.as_bytes(),
274            Some("UTF-8".as_bytes()),
275            standalone,
276        )))?;
277        Ok(())
278    }
279
280    fn write_nodes(&self, writer: &mut Writer<impl Write>, nodes: &[Node]) -> Result<()> {
281        for node in nodes {
282            match node {
283                Node::Element(eid) => self.write_element(writer, *eid)?,
284                Node::Text(text) => {
285                    writer.write_event(Event::Text(BytesText::from_plain_str(text)))?
286                }
287                Node::DocType(text) => writer.write_event(Event::DocType(
288                    BytesText::from_plain_str(&format!(" {}", text)), // add a whitespace before text
289                ))?,
290                // Comment, CData, and PI content is not escaped.
291                Node::Comment(text) => {
292                    writer.write_event(Event::Comment(BytesText::from_escaped_str(text)))?
293                }
294                Node::CData(text) => {
295                    writer.write_event(Event::CData(BytesText::from_escaped_str(text)))?
296                }
297                Node::PI(text) => {
298                    writer.write_event(Event::PI(BytesText::from_escaped_str(text)))?
299                }
300            };
301        }
302        Ok(())
303    }
304
305    fn write_element(&self, writer: &mut Writer<impl Write>, element: Element) -> Result<()> {
306        let name_bytes = element.full_name(self).as_bytes();
307        let mut start = BytesStart::borrowed_name(name_bytes);
308        for (key, val) in element.attributes(self) {
309            let val = quick_xml::escape::escape(val.as_bytes());
310            start.push_attribute((key.as_bytes(), &val[..]));
311        }
312        for (prefix, val) in element.namespace_decls(self) {
313            let attr_name = if prefix.is_empty() {
314                "xmlns".to_string()
315            } else {
316                format!("xmlns:{}", prefix)
317            };
318            let val = quick_xml::escape::escape(val.as_bytes());
319            start.push_attribute((attr_name.as_bytes(), &val[..]));
320        }
321        if element.has_children(self) {
322            writer.write_event(Event::Start(start))?;
323            self.write_nodes(writer, element.children(self))?;
324            writer.write_event(Event::End(BytesEnd::borrowed(name_bytes)))?;
325        } else {
326            writer.write_event(Event::Empty(start))?;
327        }
328        Ok(())
329    }
330}
331
332impl FromStr for Document {
333    type Err = Error;
334
335    fn from_str(s: &str) -> Result<Document> {
336        Document::parse_str(s)
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343
344    #[test]
345    fn test_add_element() {
346        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
347        <basic>
348            Text
349            <c />
350        </basic>
351        "#;
352        let mut doc = Document::from_str(xml).unwrap();
353        let basic = doc.container().children(&doc)[0].as_element().unwrap();
354        let p = Element::new(&mut doc, "p");
355        basic.push_child(&mut doc, Node::Element(p)).unwrap();
356        assert_eq!(p.parent(&doc).unwrap(), basic);
357        assert_eq!(
358            p,
359            basic.children(&doc).last().unwrap().as_element().unwrap()
360        )
361    }
362}