1use atom_syndication::Feed;
2use hyper::Client;
3use std::io::Read;
4use std::thread;
5
6use std::fs;
7use std::fs::{ File, create_dir };
8use std::io::prelude::*;
9use std::path::Path;
10use zip::ZipArchive;
11
12const BOUND_VOL_URL: &'static str = "http://api.data.parliament.uk/resources/files/feed?dataset=14";
13const BASE: &'static str = "./data";
14const VOL_ZIP_DIR: &'static str = "vol_zip";
15const XML_DIR: &'static str = "xml";
16const INNER_ZIP_DIR: &'static str = "inner_zip";
17
18fn get_save_zip(url: String) -> thread::JoinHandle<()> {
19 thread::spawn(move || {
20 let split_path = url.split("/").collect::<Vec<&str>>();
21 let file_name = split_path.last().unwrap();
22 let full_path = format!("{}/{}/{}", BASE, VOL_ZIP_DIR, file_name);
23
24 if Path::new(full_path.as_str()).exists() {
25 info!("Skipping: {}", full_path);
26 } else {
27 info!("Getting: {}", url);
28
29 let mut zip_buf = Vec::new();
30 if let Err(e) = Client::new()
31 .get(url.as_str())
32 .send().unwrap()
33 .read_to_end(&mut zip_buf) {
34
35 info!("Error: {:?}", e);
36 return;
37 }
38
39 info!("Saving: {}", file_name);
40
41 let mut file = File::create(full_path.clone()).unwrap();
42 file.write_all(zip_buf.as_slice()).unwrap();
43 }
44
45 let zip_file = File::open(full_path).unwrap();
46 process_zip(zip_file);
47 })
48}
49
50fn process_zip<T: Read + Seek>(zip_file: T) {
51 let mut zip = ZipArchive::new(zip_file).unwrap();
52
53 for i in 0..zip.len() {
54 let mut file = zip.by_index(i).unwrap();
55 let og_file_name = format!("{}", file.name());
56 let inner_split_path = og_file_name.split("/").collect::<Vec<&str>>();
57 let inner_file_name = inner_split_path.last().unwrap();
58 let folder = if inner_file_name.ends_with("xml") { XML_DIR } else { INNER_ZIP_DIR };
59 let inner_file_path = format!("{}/{}/{}", BASE, folder, inner_file_name);
60
61 if !inner_file_name.contains("html") &&
62 !inner_file_name.ends_with("pdf") &&
63 !inner_file_name.ends_with("htm") &&
64 !Path::new(inner_file_path.as_str()).exists() {
65
66 info!("Extracting: {}", file.name());
67
68 let mut zip_buf = Vec::new();
69 if let Err(e) = file.read_to_end(&mut zip_buf) {
70 info!("Error: {}", e);
71 }
72
73 info!("Saving: {}", inner_file_path);
74
75 let mut inner_file = File::create(inner_file_path.clone()).unwrap();
76 inner_file.write_all(zip_buf.as_slice()).unwrap();
77 } else {
78 info!("Skipping: {}", inner_file_name);
79 }
80
81 if inner_file_name.ends_with("zip") &&
82 !inner_file_name.contains("html") {
83 let inner_zip = File::open(inner_file_path).unwrap();
84 process_zip(inner_zip);
85 }
86 }
87}
88
89pub fn xml() -> Vec<String> {
91 retrieve();
92
93 fs::read_dir(format!("{}/{}", BASE, XML_DIR))
94 .unwrap()
95 .map(|ent| {
96 let mut xml_buf = String::new();
97 File::open(ent.unwrap().path()).unwrap().read_to_string(&mut xml_buf).unwrap_or(0usize);
98 xml_buf
99 }).collect::<Vec<String>>()
100}
101
102pub fn retrieve() {
104 let mut atom_str = String::new();
105
106 Client::new()
107 .get(BOUND_VOL_URL)
108 .send().unwrap()
109 .read_to_string(&mut atom_str).unwrap();
110
111 let feed = atom_str.parse::<Feed>().unwrap();
112
113 if let Err(e) = create_dir(BASE) {
114 info!("Create dir: {}", e);
115 }
116 if let Err(e) = create_dir(format!("{}/{}", BASE, VOL_ZIP_DIR)) {
117 info!("Create dir: {}", e);
118 }
119 if let Err(e) = create_dir(format!("{}/{}", BASE, XML_DIR)) {
120 info!("Create dir: {}", e);
121 }
122 if let Err(e) = create_dir(format!("{}/{}", BASE, INNER_ZIP_DIR)) {
123 info!("Create dir: {}", e);
124 }
125
126 let vol_urls = feed.entries.iter()
127 .map(|e| e.links.first().unwrap().href.clone())
128 .collect::<Vec<String>>();
129
130 let handles = vol_urls.iter()
131 .map(|url| get_save_zip(url.clone()))
132 .collect::<Vec<thread::JoinHandle<()>>>();
133
134 for h in handles {
135 if let Err(e) = h.join() {
136 info!("Error: {:?}", e);
137 }
138 }
139}