parse_blogger_backup_xml/
parse_backup.rs1use std::collections::HashMap;
30use std::str::FromStr;
31
32use quick_xml::events::Event;
33use quick_xml::Reader;
34
35use crate::models::Entry;
36use crate::models::EntryKind;
37use crate::models::Post;
38use crate::xml_tools::end_tag_string;
39use crate::xml_tools::start_tag_string;
40use crate::xml_tools::string_from_bytes_text;
41use crate::xml_tools::string_from_cow;
42use crate::xml_tools::XPath;
43
44const POST_ID_PREFIX: &[u8] = b"tag:blogger.com,1999:blog";
46const POST_KIND: &[u8] = b"http://schemas.google.com/blogger/2008/kind#post";
47const SETTINGS_KIND: &[u8] = b"http://schemas.google.com/blogger/2008/kind#settings";
48const TEMPLATE_KIND: &[u8] = b"http://schemas.google.com/blogger/2008/kind#template";
49
50pub fn get_posts(file_path: &str) -> Result<Vec<Post>, Box<dyn std::error::Error>> {
55 let mut buf = Vec::new();
56 let mut comments = Vec::new();
57 let mut entry = Entry::new();
58 let mut posts = HashMap::new();
59 let mut reader = Reader::from_file(file_path)?;
60 let mut xpath = XPath::new();
61 loop {
62 match reader.read_event(&mut buf) {
63 Ok(Event::Start(ref bytes_start)) => {
64 xpath.push(start_tag_string(bytes_start)?);
65 }
66 Ok(Event::End(ref bytes_end)) => {
67 if xpath.as_string() == "feed=>entry" {
68 match entry.kind {
69 Some(EntryKind::Comment) => comments.push(entry.to_comment().unwrap()),
70 Some(EntryKind::Post) => {
71 let post = entry.to_post().unwrap();
72 posts.insert(post.id.to_owned(), post);
73 }
74 _ => (),
75 }
76 entry.clear();
77 }
78 xpath.pop_checked(end_tag_string(bytes_end)?);
79 }
80 Ok(Event::Empty(byte_start)) => {
81 for attribute in byte_start.attributes().flatten() {
82 match attribute.value {
83 value if value == POST_KIND => entry.kind = Some(EntryKind::Post),
84 value if value == SETTINGS_KIND => entry.kind = Some(EntryKind::Settings),
85 value if value == TEMPLATE_KIND => entry.kind = Some(EntryKind::Template),
86 value if value.starts_with(POST_ID_PREFIX) => {
87 entry.kind = Some(EntryKind::Comment);
88 entry.post_id = Some(string_from_cow(value)?);
89 }
90 _value => {
91 }
94 }
95 }
96 }
97 Ok(Event::Text(bytes_text)) => {
98 let text = Some(string_from_bytes_text(bytes_text)?);
99 match xpath.as_string().as_str() {
100 "feed=>entry=>author=>name" => entry.author_name = text,
101 "feed=>entry=>published" => {
102 let text = text.unwrap();
103 let published = parse_published(&text)?;
104 entry.published = Some(published);
105 }
106 "feed=>entry=>id" => entry.id = text,
107 "feed=>entry=>title" => entry.title = text,
108 "feed=>entry=>content" => entry.content = text,
109 "feed=>entry=>app:control=>app:draft" => {
110 if text.unwrap() == "yes" {
111 entry.draft = true;
112 println!("This post is a draft")
113 }
114 }
115 "feed=>entry" => println!("{}", text.unwrap()),
116 _ => (),
117 }
118 }
119 Ok(Event::Eof) => break,
120 Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
121 Ok(_event) => {}
122 }
123 }
124 for comment in comments {
125 if let Some(post) = posts.get_mut(&comment.post_id) {
126 post.comments.push(comment);
127 } else {
128 println!("missing post for comment {:?}", comment);
129 }
130 }
131 let mut posts: Vec<Post> = posts.into_iter().map(|(_, post)| post).collect();
132 posts.sort_by(|a, b| a.published.cmp(&b.published));
133 Ok(posts)
134}
135
136pub fn parse_published(
137 published: &str,
138) -> Result<chrono::DateTime<chrono::FixedOffset>, Box<dyn std::error::Error>> {
139 let dt = chrono::DateTime::from_str(published)?;
140 Ok(dt)
141}
142
143#[cfg(test)]
144mod tests {
145 use super::get_posts;
146
147 #[test]
148 fn test_get_posts() {
149 let posts = get_posts("data/backup.xml").unwrap();
150 dbg!(posts);
151 }
152}