Enum nom_xml::Document

source ·
pub enum Document {
    Prolog {
        xml_decl: Option<XmlDecl>,
        misc: Option<Vec<Misc>>,
        doc_type: Option<DocType>,
    },
    Element(Tag, Box<Document>, Tag),
    Content(Option<String>),
    Nested(Vec<Document>),
    Empty,
    EmptyTag(Tag),
    ProcessingInstruction(ProcessingInstruction),
    Comment(String),
    CDATA(String),
}
Expand description

Main entry point for parsing XML documents

This enum encapsulates all of the top level types that comprise an XML document. The core variant is the Element(Tag,Box<Document>,Tag) type which allows recursive parsing of nested tags and their content.

Variants§

§

Prolog

Fields

§xml_decl: Option<XmlDecl>
§misc: Option<Vec<Misc>>
§doc_type: Option<DocType>
§

Element(Tag, Box<Document>, Tag)

§

Content(Option<String>)

§

Nested(Vec<Document>)

§

Empty

§

EmptyTag(Tag)

§

ProcessingInstruction(ProcessingInstruction)

§

Comment(String)

§

CDATA(String)

Implementations§

source§

impl Document

source

pub fn parse_prolog<'a>( input: &'a str, entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>, config: &'a Config, ) -> IResult<&'a str, (Option<Document>, Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>)>

source

pub fn parse_element_by_tag_name<'a>( input: &'a str, tag_name: &'a str, attributes: &Option<Vec<Attribute>>, ) -> IResult<&'a str, Document>

The main interface for parsing the first element that matches criteria

See the parse_first_matching_element example for more information

Run with cargo run --example parse_first_matching_element

Also see the parse_element_with_specific_attribute_value example

Run with cargo run --example parse_element_with_specific_attribute_value

Examples found in repository?
examples/parse_first_matching_element.rs (line 10)
7
8
9
10
11
12
13
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut file = File::open("examples/TheExpanseSeries.xml")?;
    let data = read_file(&mut file)?;
    let (_, doc) = Document::parse_element_by_tag_name(&data, "book", &None)?;
    println!("{doc:?}");
    Ok(())
}
More examples
Hide additional examples
examples/parse_element_with_specific_attribute_value.rs (lines 11-15)
8
9
10
11
12
13
14
15
16
17
18
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut file = File::open("examples/TheExpanseSeries.xml")?;
    let data = read_file(&mut file)?;
    let (_, doc) = Document::parse_element_by_tag_name(
        &data,
        "book",
        &Some(vec![Attribute::new("isbn", "978-0316332910")]),
    )?;
    println!("{doc:?}");
    Ok(())
}
examples/extract_information_manual.rs (line 165)
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut file = File::open("examples/TheExpanseSeries.xml")?;
    let data = read_file(&mut file)?;
    let (_, doc) = Document::parse_element_by_tag_name(&data, "book", &None)?;
    let mut book = Book::default();

    doc.iter_with_depth(0)
        .filter_map(|element| {
            if let Document::Element(tag, inner_doc, _) = element {
                Some((tag, inner_doc))
            } else {
                None
            }
        })
        .try_for_each(|(tag, inner_doc)| book.update_field(tag, inner_doc))?;

    println!("{book:#?}");
    Ok(())
}
examples/extract_information_derived.rs (line 40)
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut file = File::open("examples/TheExpanseSeries.xml")?;
    let data = read_file(&mut file)?;
    let (_, doc) = Document::parse_element_by_tag_name(&data, "book", &None)?;
    let mut book = Book::default();
    // doc.iter_with_depth(0)
    //     .filter_map(|record| {
    //         if let Document::Element(tag, inner_doc, _) = record {
    //             Some((tag, inner_doc))
    //         } else {
    //             None
    //         }
    //     })
    //     .try_for_each(|(tag, inner_doc)| book.update_field(tag, inner_doc))?;
    // book.update_attribute_fields(&doc);
    book.update_fields(&doc)?;
    println!("{book:#?}");
    Ok(())
}
examples/extract_information_derived_catalog.rs (line 46)
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut file = File::open("examples/TheExpanseSeries.xml")?;
    let data = read_file(&mut file)?;
    let (_, doc) = Document::parse_element_by_tag_name(&data, "catalog", &None)?;
    let mut books = Books::default();

    doc.iter_with_depth(0)
        .filter_map(|element| {
            if let Document::Element(tag, inner_doc, _) = element {
                Some((tag, inner_doc))
            } else {
                None
            }
        })
        .try_for_each(|(tag, inner_doc)| books.update_field(tag, inner_doc))
        .map_err(|e| {
            println!("Error updating field: {}", e);
            e
        })?;

    println!("{books:#?}");
    Ok(())
}
examples/extract_information_manual_catalog.rs (line 205)
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut file = File::open("examples/TheExpanseSeries.xml")?;
    let data = read_file(&mut file)?;
    let (_, doc) = Document::parse_element_by_tag_name(&data, "catalog", &None)?;
    let mut books = Books::default();

    doc.iter_with_depth(0)
        .filter_map(|element| {
            if let Document::Element(tag, inner_doc, _) = element {
                Some((tag, inner_doc))
            } else {
                None
            }
        })
        .try_for_each(|(tag, inner_doc)| books.update_field(tag, inner_doc))
        .map_err(|e| {
            println!("Error updating field: {}", e);
            e
        })?;

    println!("{books:#?}");
    Ok(())
}
source

pub fn parse_elements_by_tag_name<'a>( input: &'a str, tag_name: &'a str, attributes: &Option<Vec<Attribute>>, ) -> IResult<&'a str, Vec<Document>>

The main interface for parsing many elements with the same tag name

See the parse_all_of_specific_tag example for more information

Run with cargo run --example parse_all_of_specific_tag

Examples found in repository?
examples/parse_all_of_specific_tag.rs (line 12)
8
9
10
11
12
13
14
15
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut file = File::open("examples/TheExpanseSeries.xml")?;
    let data = read_file(&mut file)?;

    let (_, doc) = Document::parse_elements_by_tag_name(&data, "book", &None)?;
    println!("{doc:?}");
    Ok(())
}
source§

impl Document

source

pub fn iter_with_depth(&self, max_level: usize) -> DocumentIterator<'_>

The main interface for exracting content from the Document tree See the extract_information_manual example for more information

Examples found in repository?
examples/extract_information_derived_catalog.rs (line 49)
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut file = File::open("examples/TheExpanseSeries.xml")?;
    let data = read_file(&mut file)?;
    let (_, doc) = Document::parse_element_by_tag_name(&data, "catalog", &None)?;
    let mut books = Books::default();

    doc.iter_with_depth(0)
        .filter_map(|element| {
            if let Document::Element(tag, inner_doc, _) = element {
                Some((tag, inner_doc))
            } else {
                None
            }
        })
        .try_for_each(|(tag, inner_doc)| books.update_field(tag, inner_doc))
        .map_err(|e| {
            println!("Error updating field: {}", e);
            e
        })?;

    println!("{books:#?}");
    Ok(())
}
More examples
Hide additional examples
examples/extract_information_manual.rs (line 64)
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
    fn update_field(
        &mut self,
        tag: &Tag,
        doc: &Document,
    ) -> Result<(), Box<dyn std::error::Error>> {
        let field_name = &tag.name.local_part;

        if let Some(attributes_vec) = &tag.attributes {
            for attr in attributes_vec.iter() {
                if let Attribute::Instance {
                    name,
                    value: AttributeValue::Value(attr_val),
                } = attr
                {
                    if name.local_part == "isbn" {
                        self.isbn = attr_val.to_string();
                    }
                }
            }
        }

        match &doc {
            Document::Content(Some(value)) => match field_name.as_str() {
                "title" => {
                    self.title = value.to_string();
                }
                "genre" => {
                    self.genre = value.to_string();
                }
                "type" => {
                    self.ty = value.to_string();
                }
                "series_number" => {
                    self.series_number = value.parse().unwrap_or_default();
                }
                "description" => {
                    self.description = value.to_string();
                }
                e => {
                    return Err(format!("Unknown field2: {}", e).into());
                }
            },
            Document::Nested(_) => {
                for element in doc.iter_with_depth(1) {
                    if let Document::Element(tag, inner_doc, _) = element {
                        if "authored_by" == tag.name.local_part {
                            self.authored_by.update_fields(inner_doc)?;
                        } else {
                            self.update_field(tag, inner_doc)?;
                        }
                    } else {
                        return Err(format!("Unknown field: {element:#?}").into());
                    }
                }
            }

            _ => {
                return Err("Content is missing".into());
            }
        }

        Ok(())
    }
}

#[derive(Debug, Default, Clone)]
struct AuthoredBy {
    pen_name: String,
    authors: Vec<AuthorName>,
}

impl UpdateFields for AuthoredBy {
    fn update_field(
        &mut self,
        tag: &Tag,
        doc: &Document,
    ) -> Result<(), Box<dyn std::error::Error>> {
        match (tag.name.local_part.as_str(), doc) {
            ("pen_name", Document::Content(Some(value))) => {
                self.pen_name = value.to_string();
                Ok(())
            }
            ("authors", Document::Nested(elements)) => {
                elements.iter().try_for_each(
                    |element| -> std::result::Result<(), Box<dyn std::error::Error>> {
                        if let Document::Element(_, inner_doc, _) = element {
                            let mut author_name = AuthorName::default();
                            if let Document::Nested(inner_elements) = inner_doc.as_ref() {
                                inner_elements.iter().try_for_each(
                                    |inner_element| -> Result<(), Box<dyn std::error::Error>> {
                                        if let Document::Element(tag, content, _) = inner_element {
                                            author_name.update_field(tag, content)?;
                                        }
                                        Ok(())
                                    },
                                )?;
                                self.authors.push(author_name);
                            } else {
                                return Err("Content is missing in Author authors".into());
                            }
                        }
                        Ok(())
                    },
                )?;
                Ok(())
            }
            _ => Err(format!("Unknown field in Author: {}", tag.name.local_part).into()),
        }
    }
}

#[derive(Debug, Default, Clone)]
struct AuthorName {
    first_name: String,
    last_name: String,
}
impl UpdateFields for AuthorName {
    fn update_field(
        &mut self,
        tag: &Tag,
        doc: &Document,
    ) -> Result<(), Box<dyn std::error::Error>> {
        let field_name = &tag.name.local_part;

        if let Document::Content(Some(value)) = &doc {
            match field_name.as_str() {
                "first_name" => {
                    self.first_name = value.to_string();
                    Ok(())
                }
                "last_name" => {
                    self.last_name = value.to_string();
                    Ok(())
                }
                e => Err(format!("Unknown field in AuthorName: {}", e).into()),
            }
        } else {
            Err("Content is missing in AuthorName".into())
        }
    }
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut file = File::open("examples/TheExpanseSeries.xml")?;
    let data = read_file(&mut file)?;
    let (_, doc) = Document::parse_element_by_tag_name(&data, "book", &None)?;
    let mut book = Book::default();

    doc.iter_with_depth(0)
        .filter_map(|element| {
            if let Document::Element(tag, inner_doc, _) = element {
                Some((tag, inner_doc))
            } else {
                None
            }
        })
        .try_for_each(|(tag, inner_doc)| book.update_field(tag, inner_doc))?;

    println!("{book:#?}");
    Ok(())
}
examples/extract_information_manual_catalog.rs (line 99)
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
    fn update_field(
        &mut self,
        tag: &Tag,
        doc: &Document,
    ) -> Result<(), Box<dyn std::error::Error>> {
        let field_name = &tag.name.local_part;

        if let Some(attributes) = &tag.attributes {
            for attr in attributes.iter() {
                if let Attribute::Instance {
                    name,
                    value: AttributeValue::Value(attr_val),
                } = attr
                {
                    if name.local_part == "isbn" {
                        self.isbn = Some(attr_val.into());
                    }
                }
            }
        }

        match &doc {
            Document::Content(Some(value)) => match field_name.as_str() {
                "title" => {
                    self.title = value.into();
                }
                "genre" => {
                    self.genre = value.into();
                }
                "type" => {
                    self.ty = value.to_string();
                }
                "series_number" => {
                    self.series_number = value.parse().unwrap_or_default();
                }
                "description" => {
                    self.description = value.into();
                }
                e => {
                    return Err(format!("Unknown field: {}", e).into());
                }
            },
            Document::Nested(_) => {
                doc.iter_with_depth(1).try_for_each(
                    |element| -> Result<(), Box<dyn std::error::Error>> {
                        if let Document::Element(tag, inner_doc, _) = element {
                            match tag.name.local_part.as_str() {
                                "authored_by" => {
                                    self.authored_by.update_fields(inner_doc)?;
                                }
                                _ => {
                                    self.update_field(tag, inner_doc)?;
                                }
                            }
                            Ok(())
                        } else {
                            Err(format!("Unknown field: {element:#?}").into())
                        }
                    },
                )?;
            }
            _ => {
                return Err("Content is missing".into());
            }
        }
        Ok(())
    }
}

#[derive(Debug, Default, Clone)]
struct AuthoredBy {
    pen_name: String,
    authors: Vec<AuthorName>,
}

impl UpdateFields for AuthoredBy {
    fn update_field(
        &mut self,
        tag: &Tag,
        doc: &Document,
    ) -> Result<(), Box<dyn std::error::Error>> {
        match (tag.name.local_part.as_str(), doc) {
            ("pen_name", Document::Content(Some(value))) => {
                self.pen_name = value.to_string();
                Ok(())
            }
            ("authors", Document::Nested(elements)) => {
                elements.iter().try_for_each(
                    |element| -> std::result::Result<(), Box<dyn std::error::Error>> {
                        if let Document::Element(_, inner_doc, _) = element {
                            let mut author_name = AuthorName::default();
                            if let Document::Nested(inner_elements) = inner_doc.as_ref() {
                                inner_elements.iter().try_for_each(
                                    |inner_element| -> Result<(), Box<dyn std::error::Error>> {
                                        if let Document::Element(tag, content, _) = inner_element {
                                            author_name.update_field(tag, content)?;
                                        }
                                        Ok(())
                                    },
                                )?;
                                self.authors.push(author_name);
                            } else {
                                return Err("Content is missing in Author authors".into());
                            }
                        }
                        Ok(())
                    },
                )?;
                Ok(())
            }
            _ => Err(format!("Unknown field in Author: {}", tag.name.local_part).into()),
        }
    }
}

#[derive(Debug, Default, Clone)]
struct AuthorName {
    first_name: String,
    last_name: String,
}
impl UpdateFields for AuthorName {
    fn update_field(
        &mut self,
        tag: &Tag,
        doc: &Document,
    ) -> Result<(), Box<dyn std::error::Error>> {
        let field_name = &tag.name.local_part;

        if let Document::Content(Some(value)) = &doc {
            match field_name.as_str() {
                "first_name" => {
                    self.first_name = value.to_string();
                    Ok(())
                }
                "last_name" => {
                    self.last_name = value.to_string();
                    Ok(())
                }
                e => Err(format!("Unknown field in AuthorName: {}", e).into()),
            }
        } else {
            Err("Content is missing in AuthorName".into())
        }
    }
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let mut file = File::open("examples/TheExpanseSeries.xml")?;
    let data = read_file(&mut file)?;
    let (_, doc) = Document::parse_element_by_tag_name(&data, "catalog", &None)?;
    let mut books = Books::default();

    doc.iter_with_depth(0)
        .filter_map(|element| {
            if let Document::Element(tag, inner_doc, _) = element {
                Some((tag, inner_doc))
            } else {
                None
            }
        })
        .try_for_each(|(tag, inner_doc)| books.update_field(tag, inner_doc))
        .map_err(|e| {
            println!("Error updating field: {}", e);
            e
        })?;

    println!("{books:#?}");
    Ok(())
}

Trait Implementations§

source§

impl Clone for Document

source§

fn clone(&self) -> Document

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for Document

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl DynamicEquality for Document

source§

fn equals(&self, pattern: Pattern<'_>, method: ComparisonMethod) -> bool

source§

impl<'a> IntoIterator for &'a Document

source§

type Item = &'a Document

The type of the elements being iterated over.
source§

type IntoIter = DocumentIterator<'a>

Which kind of iterator are we turning this into?
source§

fn into_iter(self) -> Self::IntoIter

Creates an iterator from a value. Read more
source§

impl<'a> Parse<'a> for Document

source§

fn parse(input: &'a str, args: Self::Args) -> Self::Output

use nom_xml::{parse::Parse, config::Config, Document};

let xml = "<root><child>Content</child></root>";
let (_, doc) = Document::parse(xml, &Config::default()).unwrap();
println!("{doc:?}");
source§

type Args = &'a Config

source§

type Output = Result<(&'a str, Document), Err<Error>>

source§

fn is_char(c: char) -> bool

source§

fn parse_char(input: &str) -> IResult<&str, char>

source§

fn is_whitespace(c: char) -> bool

source§

fn parse_multispace1(input: &str) -> IResult<&str, ()>

source§

fn parse_multispace0(input: &str) -> IResult<&str, ()>

source§

fn is_name_start_char(c: char) -> bool

source§

fn is_name_char(c: char) -> bool

source§

fn parse_name_char(input: &str) -> IResult<&str, char>

source§

fn parse_name_start_char(input: &str) -> IResult<&str, char>

source§

fn parse_nmtoken(input: &str) -> IResult<&str, String>

source§

fn parse_nmtokens(input: &str) -> IResult<&str, Vec<String>>

source§

fn parse_name(input: &str) -> IResult<&str, Name>

source§

fn parse_names(input: &str) -> IResult<&str, Vec<Name>>

source§

fn parse_eq(input: &str) -> IResult<&str, ()>

source§

fn capture_span<O, F>( f: F, ) -> Box<dyn FnMut(&'a str) -> IResult<&'a str, (&'a str, O)> + 'a>
where F: FnMut(&'a str) -> IResult<&'a str, O> + 'a,

source§

impl<'a> ParseNamespace<'a> for Document

source§

impl PartialEq for Document

source§

fn eq(&self, other: &Document) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
source§

impl Eq for Document

source§

impl StructuralPartialEq for Document

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> CloneToUninit for T
where T: Clone,

source§

unsafe fn clone_to_uninit(&self, dst: *mut T)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for T
where T: Clone,

source§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

source§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.