extern crate reqwest;
extern crate urls2disk;
use std::fs;
use std::path::Path;
use urls2disk::{wkhtmltopdf, ClientBuilder, Result, SimpleDocument, Url};
fn run() -> Result<()> {
let output_directory = Path::new("./data");
if !output_directory.exists() {
fs::create_dir_all(output_directory)?;
}
let base = "https://www.sec.gov/Archives/edgar/data/";
let urls = vec![
"320193/000119312510238044/d10k.htm",
"320193/000119312511282113/d220209d10k.htm",
"320193/000119312512444068/d411355d10k.htm",
"320193/000119312513416534/d590790d10k.htm",
"320193/000119312514383437/d783162d10k.htm",
"320193/000119312515356351/d17062d10k.htm",
"320193/000162828016020309/a201610-k9242016.htm",
"320193/000032019317000070/a10-k20179302017.htm",
].iter()
.map(|stem| format!("{}{}", &base, stem))
.collect::<Vec<String>>();
let html_documents = urls.iter()
.enumerate()
.map(|(i, url_string)| {
let filename = format!("Apple 10-K {}.html", i + 2010);
let path = output_directory.join(&filename);
let url = url_string.parse::<Url>()?;
let wkhtmltopdf = false;
let document = SimpleDocument::new(path, url, wkhtmltopdf);
Ok(Box::new(document))
})
.collect::<Result<Vec<Box<SimpleDocument>>>>()?;
let pdf_documents = urls.iter()
.enumerate()
.map(|(i, url_string)| {
let filename = format!("Apple 10-K {}.pdf", i + 2010);
let path = output_directory.join(&filename);
let url = url_string.parse::<Url>()?;
let wkhtmltopdf = true;
let document = SimpleDocument::new(path, url, wkhtmltopdf);
Ok(Box::new(document))
})
.collect::<Result<Vec<Box<SimpleDocument>>>>()?;
let mut documents = [&html_documents[..], &pdf_documents[..]].concat();
let client = ClientBuilder::default()
.set_max_requests_per_second(9)
.set_max_threads_cpu(4)
.set_max_threads_io(50)
.set_reqwest_client(reqwest::Client::new())
.set_wkhtmltopdf_setting(wkhtmltopdf::Setting::Zoom(3.5))
.set_wkhtmltopdf_settings(vec![
wkhtmltopdf::Setting::DisableExternalLinks(true),
wkhtmltopdf::Setting::DisableJavascript(true),
])
.build()?;
client.get_documents(&mut documents)?;
Ok(())
}
fn main() {
run().unwrap();
}