parse_blogger_backup_xml/
xml_tools.rs1use quick_xml::events::{BytesEnd, BytesStart, BytesText};
7use quick_xml::{events::Event, Reader};
8use std::borrow::Cow;
9use std::collections::HashSet;
10use std::fmt;
11
12pub fn string_from_bytes_text(bytes_text: BytesText) -> Result<String, Box<dyn std::error::Error>> {
13 let bytes = bytes_text.unescaped()?.into_owned();
14 Ok(String::from_utf8(bytes)?)
15}
16
17pub fn start_tag_string(bytes_start: &BytesStart) -> Result<String, Box<dyn std::error::Error>> {
18 let tag = bytes_start.name();
19 let tag = tag.to_owned();
20 let tag = String::from_utf8(tag)?;
21 Ok(tag)
22}
23pub fn end_tag_string(bytes_end: &BytesEnd) -> Result<String, Box<dyn std::error::Error>> {
24 let tag = bytes_end.name();
25 let tag = tag.to_owned();
26 let tag = String::from_utf8(tag)?;
27 Ok(tag)
28}
29
30pub fn string_from_cow(cow: Cow<[u8]>) -> Result<String, Box<dyn std::error::Error>> {
31 let string = match cow {
32 Cow::Owned(internal) => String::from_utf8(internal)?,
33 Cow::Borrowed(internal) => String::from_utf8(internal.to_owned())?,
34 };
35 Ok(string)
36}
37
38struct Stringable(String);
39
40impl fmt::Display for Stringable {
41 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42 write!(f, "{}", self.0)
43 }
44}
45
46impl From<&BytesStart<'_>> for Stringable {
47 fn from(start: &BytesStart) -> Self {
48 let tag = start.name();
49 let tag = tag.to_owned();
50 let tag = String::from_utf8(tag).expect("Tag not in utf8");
51 Self(tag)
52 }
53}
54
55pub struct XPath(Vec<String>);
56
57impl XPath {
58 pub fn new() -> Self {
59 Self(vec![])
60 }
61 pub fn push(&mut self, tag: String) {
62 self.0.push(tag);
63 }
64 pub fn pop(&mut self) -> Option<String> {
65 self.0.pop()
66 }
67 pub fn pop_checked(&mut self, tag: String) {
68 assert_eq!(self.pop().expect("can't end without starting."), tag);
69 }
70 pub fn as_string(&self) -> String {
71 self.0.join("=>")
72 }
73}
74
75impl Default for XPath {
76 fn default() -> Self {
77 Self::new()
78 }
79}
80
81impl fmt::Debug for XPath {
82 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83 write!(f, "{}", self.as_string())
84 }
85}
86
87impl fmt::Display for XPath {
88 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
89 write!(f, "{}", self.as_string())
90 }
91}
92
93pub fn tag_names(path: &str) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
95 let mut reader = Reader::from_file(path)?;
96 let mut buf = Vec::new();
97 let mut tag_names: HashSet<String> = HashSet::new();
98 loop {
99 match reader.read_event(&mut buf) {
100 Ok(Event::Start(ref e)) => {
101 let tag = start_tag_string(e)?;
102 tag_names.insert(tag);
103 }
104 Ok(Event::Eof) => break,
105 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
106 _ => (),
107 }
108 }
109 Ok(tag_names)
110}
111
112pub fn all_text(path: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
114 let mut reader = Reader::from_file(path)?;
115 let mut buf = Vec::new();
116 let mut txt = Vec::new();
117 loop {
118 match reader.read_event(&mut buf) {
119 Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).unwrap()),
120 Ok(Event::Eof) => break,
121 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
122 _ => (),
123 }
124 }
125 Ok(txt)
126}
127
128pub fn paths(path: &str) -> Result<(), Box<dyn std::error::Error>> {
130 let mut reader = Reader::from_file(path)?;
131 let mut xpath: XPath = XPath::new();
132 let mut buf = Vec::new();
133 let mut xpath_strings = HashSet::new();
134 loop {
135 match reader.read_event(&mut buf) {
136 Ok(Event::Start(ref e)) => {
137 xpath.push(start_tag_string(e)?);
138 }
139 Ok(Event::End(ref e)) => {
140 xpath.pop_checked(end_tag_string(e)?);
141 }
142 Ok(Event::Eof) => break,
143 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
144 _ => (),
145 }
146 xpath_strings.insert(xpath.as_string());
147 }
148 let mut xpath_strings: Vec<String> = xpath_strings.into_iter().collect();
149 xpath_strings.sort();
150 for xpath_string in xpath_strings {
151 println!("{}", xpath_string);
152 }
153 Ok(())
154}
155
156pub fn all_attributes(file_path: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
158 let mut reader = Reader::from_file(file_path)?;
159 let mut xpath = XPath::new();
160 let mut buf = Vec::new();
161 let mut attributes = HashSet::new();
162 loop {
163 match reader.read_event(&mut buf) {
164 Ok(Event::Start(ref e)) => {
165 xpath.push(start_tag_string(e)?);
166 for attr in e.attributes() {
167 let attr_string = format!("{:?}", attr.unwrap());
168 attributes.insert(attr_string);
169 }
170 }
171 Ok(Event::End(ref e)) => xpath.pop_checked(end_tag_string(e)?),
172 Ok(Event::Eof) => break,
173 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
174 Ok(_event) => {}
175 }
176 }
177 let mut attributes: Vec<String> = attributes.into_iter().collect();
178 attributes.sort();
179 for attr in &attributes {
180 println!("{}", attr);
181 }
182 Ok(attributes)
183}
184
185pub fn path_contents(
187 file_path: &str,
188 x_path: &str,
189 first: u32,
190 last: u32,
191) -> Result<(), Box<dyn std::error::Error>> {
192 let mut reader = Reader::from_file(file_path)?;
193 let mut xpath = XPath::new();
194 let mut xpath_string = "".to_owned();
195 let mut buf = Vec::new();
196 let mut index = 0;
197 loop {
198 match reader.read_event(&mut buf) {
199 Ok(Event::Start(ref e)) => {
200 xpath.push(start_tag_string(e)?);
201 xpath_string = xpath.as_string();
202 }
203 Ok(Event::End(ref e)) => {
204 if x_path == xpath_string {
205 index += 1;
206 if first <= index && index <= last {
207 println!()
208 };
209 }
210 xpath.pop_checked(end_tag_string(e)?);
211 xpath_string = xpath.as_string();
212 }
213 Ok(Event::Eof) => break,
214 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
215 Ok(event) => {
216 if x_path == xpath_string && first <= index && index <= last {
217 println!("{:?}", event);
218 }
219 }
220 }
221 }
222 Ok(())
223}
224
225#[cfg(test)]
226mod tests {
227 use super::*;
228
229 #[test]
230 fn run_path_contents() -> Result<(), Box<dyn std::error::Error>> {
231 let entry_number = 102;
253 path_contents(
254 "data/harris_backup.xml",
255 "feed=>entry=>app:control=>app:draft",
256 0,
257 entry_number,
258 )?;
259 Ok(())
260 }
261 #[test]
262 fn run_all_attributes() -> Result<(), Box<dyn std::error::Error>> {
263 all_attributes("data/harris_backup.xml")?;
264 Ok(())
265 }
266 #[test]
267 fn run_paths() -> Result<(), Box<dyn std::error::Error>> {
268 paths("data/harris_backup.xml")?;
269 Ok(())
270 }
271 #[test]
272 fn print_tag_names() -> Result<(), Box<dyn std::error::Error>> {
273 let tags = tag_names("data/harris_backup.xml")?;
274 dbg!(tags);
275 Ok(())
276 }
277 #[test]
278 fn print_all_text() -> Result<(), Box<dyn std::error::Error>> {
279 let tags = all_text("data/harris_backup.xml")?;
280 dbg!(tags);
281 Ok(())
282 }
283}