extract_information_manual/
extract_information_manual.rs

1use std::fs::File;
2
3use nom_xml::{
4    attribute::{Attribute, AttributeValue},
5    io::read_file,
6    tag::Tag,
7    Document, UpdateFields,
8};
9
10#[derive(Debug, Default)]
11struct Book {
12    isbn: String,
13    authored_by: AuthoredBy,
14    title: String,
15    genre: String,
16    ty: String,
17    series_number: u8,
18    description: String,
19}
20impl UpdateFields for Book {
21    fn update_field(
22        &mut self,
23        tag: &Tag,
24        doc: &Document,
25    ) -> Result<(), Box<dyn std::error::Error>> {
26        let field_name = &tag.name.local_part;
27
28        if let Some(attributes_vec) = &tag.attributes {
29            for attr in attributes_vec.iter() {
30                if let Attribute::Instance {
31                    name,
32                    value: AttributeValue::Value(attr_val),
33                } = attr
34                {
35                    if name.local_part == "isbn" {
36                        self.isbn = attr_val.to_string();
37                    }
38                }
39            }
40        }
41
42        match &doc {
43            Document::Content(Some(value)) => match field_name.as_str() {
44                "title" => {
45                    self.title = value.to_string();
46                }
47                "genre" => {
48                    self.genre = value.to_string();
49                }
50                "type" => {
51                    self.ty = value.to_string();
52                }
53                "series_number" => {
54                    self.series_number = value.parse().unwrap_or_default();
55                }
56                "description" => {
57                    self.description = value.to_string();
58                }
59                e => {
60                    return Err(format!("Unknown field2: {}", e).into());
61                }
62            },
63            Document::Nested(_) => {
64                for element in doc.iter_with_depth(1) {
65                    if let Document::Element(tag, inner_doc, _) = element {
66                        if "authored_by" == tag.name.local_part {
67                            self.authored_by.update_fields(inner_doc)?;
68                        } else {
69                            self.update_field(tag, inner_doc)?;
70                        }
71                    } else {
72                        return Err(format!("Unknown field: {element:#?}").into());
73                    }
74                }
75            }
76
77            _ => {
78                return Err("Content is missing".into());
79            }
80        }
81
82        Ok(())
83    }
84}
85
86#[derive(Debug, Default, Clone)]
87struct AuthoredBy {
88    pen_name: String,
89    authors: Vec<AuthorName>,
90}
91
92impl UpdateFields for AuthoredBy {
93    fn update_field(
94        &mut self,
95        tag: &Tag,
96        doc: &Document,
97    ) -> Result<(), Box<dyn std::error::Error>> {
98        match (tag.name.local_part.as_str(), doc) {
99            ("pen_name", Document::Content(Some(value))) => {
100                self.pen_name = value.to_string();
101                Ok(())
102            }
103            ("authors", Document::Nested(elements)) => {
104                elements.iter().try_for_each(
105                    |element| -> std::result::Result<(), Box<dyn std::error::Error>> {
106                        if let Document::Element(_, inner_doc, _) = element {
107                            let mut author_name = AuthorName::default();
108                            if let Document::Nested(inner_elements) = inner_doc.as_ref() {
109                                inner_elements.iter().try_for_each(
110                                    |inner_element| -> Result<(), Box<dyn std::error::Error>> {
111                                        if let Document::Element(tag, content, _) = inner_element {
112                                            author_name.update_field(tag, content)?;
113                                        }
114                                        Ok(())
115                                    },
116                                )?;
117                                self.authors.push(author_name);
118                            } else {
119                                return Err("Content is missing in Author authors".into());
120                            }
121                        }
122                        Ok(())
123                    },
124                )?;
125                Ok(())
126            }
127            _ => Err(format!("Unknown field in Author: {}", tag.name.local_part).into()),
128        }
129    }
130}
131
132#[derive(Debug, Default, Clone)]
133struct AuthorName {
134    first_name: String,
135    last_name: String,
136}
137impl UpdateFields for AuthorName {
138    fn update_field(
139        &mut self,
140        tag: &Tag,
141        doc: &Document,
142    ) -> Result<(), Box<dyn std::error::Error>> {
143        let field_name = &tag.name.local_part;
144
145        if let Document::Content(Some(value)) = &doc {
146            match field_name.as_str() {
147                "first_name" => {
148                    self.first_name = value.to_string();
149                    Ok(())
150                }
151                "last_name" => {
152                    self.last_name = value.to_string();
153                    Ok(())
154                }
155                e => Err(format!("Unknown field in AuthorName: {}", e).into()),
156            }
157        } else {
158            Err("Content is missing in AuthorName".into())
159        }
160    }
161}
162fn main() -> Result<(), Box<dyn std::error::Error>> {
163    let mut file = File::open("examples/TheExpanseSeries.xml")?;
164    let data = read_file(&mut file)?;
165    let (_, doc) = Document::parse_element_by_tag_name(&data, "book", &None)?;
166    let mut book = Book::default();
167
168    doc.iter_with_depth(0)
169        .filter_map(|element| {
170            if let Document::Element(tag, inner_doc, _) = element {
171                Some((tag, inner_doc))
172            } else {
173                None
174            }
175        })
176        .try_for_each(|(tag, inner_doc)| book.update_field(tag, inner_doc))?;
177
178    println!("{book:#?}");
179    Ok(())
180}