pub enum Document {
Prolog {
xml_decl: Option<XmlDecl>,
misc: Option<Vec<Misc>>,
doc_type: Option<DocType>,
},
Element(Tag, Box<Document>, Tag),
Content(Option<String>),
Nested(Vec<Document>),
Empty,
EmptyTag(Tag),
ProcessingInstruction(ProcessingInstruction),
Comment(String),
CDATA(String),
}
Expand description
Main entry point for parsing XML documents
This enum encapsulates all of the top level types that comprise an XML document. The core variant is the Element(Tag,Box<Document>,Tag)
type which allows recursive parsing of nested tags and their content.
Variants§
Prolog
Element(Tag, Box<Document>, Tag)
Content(Option<String>)
Nested(Vec<Document>)
Empty
EmptyTag(Tag)
ProcessingInstruction(ProcessingInstruction)
Comment(String)
CDATA(String)
Implementations§
Source§impl Document
impl Document
pub fn parse_prolog<'a>( input: &'a str, entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>, config: &'a Config, ) -> IResult<&'a str, (Option<Document>, Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>)>
Sourcepub fn parse_element_by_tag_name<'a>(
input: &'a str,
tag_name: &'a str,
attributes: &Option<Vec<Attribute>>,
) -> IResult<&'a str, Document>
pub fn parse_element_by_tag_name<'a>( input: &'a str, tag_name: &'a str, attributes: &Option<Vec<Attribute>>, ) -> IResult<&'a str, Document>
The main interface for parsing the first element that matches criteria
See the parse_first_matching_element
example for more information
Run with cargo run --example parse_first_matching_element
Also see the parse_element_with_specific_attribute_value
example
Run with cargo run --example parse_element_with_specific_attribute_value
Examples found in repository?
More examples
162fn main() -> Result<(), Box<dyn std::error::Error>> {
163 let mut file = File::open("examples/TheExpanseSeries.xml")?;
164 let data = read_file(&mut file)?;
165 let (_, doc) = Document::parse_element_by_tag_name(&data, "book", &None)?;
166 let mut book = Book::default();
167
168 doc.iter_with_depth(0)
169 .filter_map(|element| {
170 if let Document::Element(tag, inner_doc, _) = element {
171 Some((tag, inner_doc))
172 } else {
173 None
174 }
175 })
176 .try_for_each(|(tag, inner_doc)| book.update_field(tag, inner_doc))?;
177
178 println!("{book:#?}");
179 Ok(())
180}
37fn main() -> Result<(), Box<dyn std::error::Error>> {
38 let mut file = File::open("examples/TheExpanseSeries.xml")?;
39 let data = read_file(&mut file)?;
40 let (_, doc) = Document::parse_element_by_tag_name(&data, "book", &None)?;
41 let mut book = Book::default();
42 // doc.iter_with_depth(0)
43 // .filter_map(|record| {
44 // if let Document::Element(tag, inner_doc, _) = record {
45 // Some((tag, inner_doc))
46 // } else {
47 // None
48 // }
49 // })
50 // .try_for_each(|(tag, inner_doc)| book.update_field(tag, inner_doc))?;
51 // book.update_attribute_fields(&doc);
52 book.update_fields(&doc)?;
53 println!("{book:#?}");
54 Ok(())
55}
43fn main() -> Result<(), Box<dyn std::error::Error>> {
44 let mut file = File::open("examples/TheExpanseSeries.xml")?;
45 let data = read_file(&mut file)?;
46 let (_, doc) = Document::parse_element_by_tag_name(&data, "catalog", &None)?;
47 let mut books = Books::default();
48
49 doc.iter_with_depth(0)
50 .filter_map(|element| {
51 if let Document::Element(tag, inner_doc, _) = element {
52 Some((tag, inner_doc))
53 } else {
54 None
55 }
56 })
57 .try_for_each(|(tag, inner_doc)| books.update_field(tag, inner_doc))
58 .map_err(|e| {
59 println!("Error updating field: {}", e);
60 e
61 })?;
62
63 println!("{books:#?}");
64 Ok(())
65}
202fn main() -> Result<(), Box<dyn std::error::Error>> {
203 let mut file = File::open("examples/TheExpanseSeries.xml")?;
204 let data = read_file(&mut file)?;
205 let (_, doc) = Document::parse_element_by_tag_name(&data, "catalog", &None)?;
206 let mut books = Books::default();
207
208 doc.iter_with_depth(0)
209 .filter_map(|element| {
210 if let Document::Element(tag, inner_doc, _) = element {
211 Some((tag, inner_doc))
212 } else {
213 None
214 }
215 })
216 .try_for_each(|(tag, inner_doc)| books.update_field(tag, inner_doc))
217 .map_err(|e| {
218 println!("Error updating field: {}", e);
219 e
220 })?;
221
222 println!("{books:#?}");
223 Ok(())
224}
Sourcepub fn parse_elements_by_tag_name<'a>(
input: &'a str,
tag_name: &'a str,
attributes: &Option<Vec<Attribute>>,
) -> IResult<&'a str, Vec<Document>>
pub fn parse_elements_by_tag_name<'a>( input: &'a str, tag_name: &'a str, attributes: &Option<Vec<Attribute>>, ) -> IResult<&'a str, Vec<Document>>
The main interface for parsing many elements with the same tag name
See the parse_all_of_specific_tag
example for more information
Run with cargo run --example parse_all_of_specific_tag
Source§impl Document
impl Document
Sourcepub fn iter_with_depth(&self, max_level: usize) -> DocumentIterator<'_> ⓘ
pub fn iter_with_depth(&self, max_level: usize) -> DocumentIterator<'_> ⓘ
The main interface for exracting content from the Document tree
See the extract_information_manual
example for more information
Examples found in repository?
43fn main() -> Result<(), Box<dyn std::error::Error>> {
44 let mut file = File::open("examples/TheExpanseSeries.xml")?;
45 let data = read_file(&mut file)?;
46 let (_, doc) = Document::parse_element_by_tag_name(&data, "catalog", &None)?;
47 let mut books = Books::default();
48
49 doc.iter_with_depth(0)
50 .filter_map(|element| {
51 if let Document::Element(tag, inner_doc, _) = element {
52 Some((tag, inner_doc))
53 } else {
54 None
55 }
56 })
57 .try_for_each(|(tag, inner_doc)| books.update_field(tag, inner_doc))
58 .map_err(|e| {
59 println!("Error updating field: {}", e);
60 e
61 })?;
62
63 println!("{books:#?}");
64 Ok(())
65}
More examples
21 fn update_field(
22 &mut self,
23 tag: &Tag,
24 doc: &Document,
25 ) -> Result<(), Box<dyn std::error::Error>> {
26 let field_name = &tag.name.local_part;
27
28 if let Some(attributes_vec) = &tag.attributes {
29 for attr in attributes_vec.iter() {
30 if let Attribute::Instance {
31 name,
32 value: AttributeValue::Value(attr_val),
33 } = attr
34 {
35 if name.local_part == "isbn" {
36 self.isbn = attr_val.to_string();
37 }
38 }
39 }
40 }
41
42 match &doc {
43 Document::Content(Some(value)) => match field_name.as_str() {
44 "title" => {
45 self.title = value.to_string();
46 }
47 "genre" => {
48 self.genre = value.to_string();
49 }
50 "type" => {
51 self.ty = value.to_string();
52 }
53 "series_number" => {
54 self.series_number = value.parse().unwrap_or_default();
55 }
56 "description" => {
57 self.description = value.to_string();
58 }
59 e => {
60 return Err(format!("Unknown field2: {}", e).into());
61 }
62 },
63 Document::Nested(_) => {
64 for element in doc.iter_with_depth(1) {
65 if let Document::Element(tag, inner_doc, _) = element {
66 if "authored_by" == tag.name.local_part {
67 self.authored_by.update_fields(inner_doc)?;
68 } else {
69 self.update_field(tag, inner_doc)?;
70 }
71 } else {
72 return Err(format!("Unknown field: {element:#?}").into());
73 }
74 }
75 }
76
77 _ => {
78 return Err("Content is missing".into());
79 }
80 }
81
82 Ok(())
83 }
84}
85
86#[derive(Debug, Default, Clone)]
87struct AuthoredBy {
88 pen_name: String,
89 authors: Vec<AuthorName>,
90}
91
92impl UpdateFields for AuthoredBy {
93 fn update_field(
94 &mut self,
95 tag: &Tag,
96 doc: &Document,
97 ) -> Result<(), Box<dyn std::error::Error>> {
98 match (tag.name.local_part.as_str(), doc) {
99 ("pen_name", Document::Content(Some(value))) => {
100 self.pen_name = value.to_string();
101 Ok(())
102 }
103 ("authors", Document::Nested(elements)) => {
104 elements.iter().try_for_each(
105 |element| -> std::result::Result<(), Box<dyn std::error::Error>> {
106 if let Document::Element(_, inner_doc, _) = element {
107 let mut author_name = AuthorName::default();
108 if let Document::Nested(inner_elements) = inner_doc.as_ref() {
109 inner_elements.iter().try_for_each(
110 |inner_element| -> Result<(), Box<dyn std::error::Error>> {
111 if let Document::Element(tag, content, _) = inner_element {
112 author_name.update_field(tag, content)?;
113 }
114 Ok(())
115 },
116 )?;
117 self.authors.push(author_name);
118 } else {
119 return Err("Content is missing in Author authors".into());
120 }
121 }
122 Ok(())
123 },
124 )?;
125 Ok(())
126 }
127 _ => Err(format!("Unknown field in Author: {}", tag.name.local_part).into()),
128 }
129 }
130}
131
132#[derive(Debug, Default, Clone)]
133struct AuthorName {
134 first_name: String,
135 last_name: String,
136}
137impl UpdateFields for AuthorName {
138 fn update_field(
139 &mut self,
140 tag: &Tag,
141 doc: &Document,
142 ) -> Result<(), Box<dyn std::error::Error>> {
143 let field_name = &tag.name.local_part;
144
145 if let Document::Content(Some(value)) = &doc {
146 match field_name.as_str() {
147 "first_name" => {
148 self.first_name = value.to_string();
149 Ok(())
150 }
151 "last_name" => {
152 self.last_name = value.to_string();
153 Ok(())
154 }
155 e => Err(format!("Unknown field in AuthorName: {}", e).into()),
156 }
157 } else {
158 Err("Content is missing in AuthorName".into())
159 }
160 }
161}
162fn main() -> Result<(), Box<dyn std::error::Error>> {
163 let mut file = File::open("examples/TheExpanseSeries.xml")?;
164 let data = read_file(&mut file)?;
165 let (_, doc) = Document::parse_element_by_tag_name(&data, "book", &None)?;
166 let mut book = Book::default();
167
168 doc.iter_with_depth(0)
169 .filter_map(|element| {
170 if let Document::Element(tag, inner_doc, _) = element {
171 Some((tag, inner_doc))
172 } else {
173 None
174 }
175 })
176 .try_for_each(|(tag, inner_doc)| book.update_field(tag, inner_doc))?;
177
178 println!("{book:#?}");
179 Ok(())
180}
56 fn update_field(
57 &mut self,
58 tag: &Tag,
59 doc: &Document,
60 ) -> Result<(), Box<dyn std::error::Error>> {
61 let field_name = &tag.name.local_part;
62
63 if let Some(attributes) = &tag.attributes {
64 for attr in attributes.iter() {
65 if let Attribute::Instance {
66 name,
67 value: AttributeValue::Value(attr_val),
68 } = attr
69 {
70 if name.local_part == "isbn" {
71 self.isbn = Some(attr_val.into());
72 }
73 }
74 }
75 }
76
77 match &doc {
78 Document::Content(Some(value)) => match field_name.as_str() {
79 "title" => {
80 self.title = value.into();
81 }
82 "genre" => {
83 self.genre = value.into();
84 }
85 "type" => {
86 self.ty = value.to_string();
87 }
88 "series_number" => {
89 self.series_number = value.parse().unwrap_or_default();
90 }
91 "description" => {
92 self.description = value.into();
93 }
94 e => {
95 return Err(format!("Unknown field: {}", e).into());
96 }
97 },
98 Document::Nested(_) => {
99 doc.iter_with_depth(1).try_for_each(
100 |element| -> Result<(), Box<dyn std::error::Error>> {
101 if let Document::Element(tag, inner_doc, _) = element {
102 match tag.name.local_part.as_str() {
103 "authored_by" => {
104 self.authored_by.update_fields(inner_doc)?;
105 }
106 _ => {
107 self.update_field(tag, inner_doc)?;
108 }
109 }
110 Ok(())
111 } else {
112 Err(format!("Unknown field: {element:#?}").into())
113 }
114 },
115 )?;
116 }
117 _ => {
118 return Err("Content is missing".into());
119 }
120 }
121 Ok(())
122 }
123}
124
125#[derive(Debug, Default, Clone)]
126struct AuthoredBy {
127 pen_name: String,
128 authors: Vec<AuthorName>,
129}
130
131impl UpdateFields for AuthoredBy {
132 fn update_field(
133 &mut self,
134 tag: &Tag,
135 doc: &Document,
136 ) -> Result<(), Box<dyn std::error::Error>> {
137 match (tag.name.local_part.as_str(), doc) {
138 ("pen_name", Document::Content(Some(value))) => {
139 self.pen_name = value.to_string();
140 Ok(())
141 }
142 ("authors", Document::Nested(elements)) => {
143 elements.iter().try_for_each(
144 |element| -> std::result::Result<(), Box<dyn std::error::Error>> {
145 if let Document::Element(_, inner_doc, _) = element {
146 let mut author_name = AuthorName::default();
147 if let Document::Nested(inner_elements) = inner_doc.as_ref() {
148 inner_elements.iter().try_for_each(
149 |inner_element| -> Result<(), Box<dyn std::error::Error>> {
150 if let Document::Element(tag, content, _) = inner_element {
151 author_name.update_field(tag, content)?;
152 }
153 Ok(())
154 },
155 )?;
156 self.authors.push(author_name);
157 } else {
158 return Err("Content is missing in Author authors".into());
159 }
160 }
161 Ok(())
162 },
163 )?;
164 Ok(())
165 }
166 _ => Err(format!("Unknown field in Author: {}", tag.name.local_part).into()),
167 }
168 }
169}
170
171#[derive(Debug, Default, Clone)]
172struct AuthorName {
173 first_name: String,
174 last_name: String,
175}
176impl UpdateFields for AuthorName {
177 fn update_field(
178 &mut self,
179 tag: &Tag,
180 doc: &Document,
181 ) -> Result<(), Box<dyn std::error::Error>> {
182 let field_name = &tag.name.local_part;
183
184 if let Document::Content(Some(value)) = &doc {
185 match field_name.as_str() {
186 "first_name" => {
187 self.first_name = value.to_string();
188 Ok(())
189 }
190 "last_name" => {
191 self.last_name = value.to_string();
192 Ok(())
193 }
194 e => Err(format!("Unknown field in AuthorName: {}", e).into()),
195 }
196 } else {
197 Err("Content is missing in AuthorName".into())
198 }
199 }
200}
201
202fn main() -> Result<(), Box<dyn std::error::Error>> {
203 let mut file = File::open("examples/TheExpanseSeries.xml")?;
204 let data = read_file(&mut file)?;
205 let (_, doc) = Document::parse_element_by_tag_name(&data, "catalog", &None)?;
206 let mut books = Books::default();
207
208 doc.iter_with_depth(0)
209 .filter_map(|element| {
210 if let Document::Element(tag, inner_doc, _) = element {
211 Some((tag, inner_doc))
212 } else {
213 None
214 }
215 })
216 .try_for_each(|(tag, inner_doc)| books.update_field(tag, inner_doc))
217 .map_err(|e| {
218 println!("Error updating field: {}", e);
219 e
220 })?;
221
222 println!("{books:#?}");
223 Ok(())
224}
Trait Implementations§
Source§impl DynamicEquality for Document
impl DynamicEquality for Document
Source§impl<'a> IntoIterator for &'a Document
impl<'a> IntoIterator for &'a Document
Source§impl<'a> Parse<'a> for Document
impl<'a> Parse<'a> for Document
Source§fn parse(input: &'a str, args: Self::Args) -> Self::Output
fn parse(input: &'a str, args: Self::Args) -> Self::Output
use nom_xml::{parse::Parse, config::Config, Document};
let xml = "<root><child>Content</child></root>";
let (_, doc) = Document::parse(xml, &Config::default()).unwrap();
println!("{doc:?}");