1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
use std::path::PathBuf;
use std::fs::File;
use std::fs;
use std::io;
use env_logger;
use log::*;
use unzip::Unzipper;
use walkdir::WalkDir;
pub struct DataSource {
pub name: &'static str,
pub url: &'static str
}
#[derive(Debug)]
pub struct SampleData {
pub root: PathBuf,
pub num_files: u64,
pub size: u64,
pub archive: PathBuf
}
impl SampleData {
pub fn remove(&self) -> io::Result<()>{
fs::remove_dir_all(&self.root)?;
fs::remove_file(&self.archive)
}
}
const KERNEL: DataSource = DataSource {
name: "Linux_Kernel",
url: "https://github.com/torvalds/linux/archive/v5.9.zip"
};
const CARGO: DataSource = DataSource {
name: "Cargo_sources",
url: "https://github.com/rust-lang/cargo/archive/0.47.0.zip"
};
fn download_and_unpack(ds: DataSource) -> Result<SampleData, String> {
std::fs::create_dir_all(ds.name);
let archive = format!("{}.zip", ds.name);
let mut num_files = 0;
let mut size = 0;
if !std::path::Path::new(&archive).is_file() {
info!("Downloading {:?}", ds.url);
let mut resp = reqwest::blocking::get(ds.url).map_err(|e| format!("{:?}", e))?;
let mut out = File::create(&archive).map_err(|e| format!("{:?}", e))?;
std::io::copy(&mut resp, &mut out).map_err(|e| format!("{:?}", e))?;
} else {
info!("Did not download, archive already present");
}
info!("Unzipping...");
Unzipper::new(File::open(&archive).unwrap(), ds.name).unzip().map_err(|e| format!("{:?}", e))?;
info!("Sample data ready. Gathering stats...");
for entry in WalkDir::new(ds.name).into_iter().filter_map(|e| e.ok()) {
println!("{}", entry.path().display());
num_files +=1;
if let Ok(meta) = entry.metadata() {
size += meta.len();
}
}
data_path(ds.name).ok_or("Could not get data dir".to_string()).map(|x| SampleData {
root: x,
archive: std::path::PathBuf::from(&archive),
num_files,
size
})
}
fn setup() {
std::env::set_var("RUST_LOG", "INFO");
let _ = env_logger::builder().try_init();
}
fn data_path(data_dir: &str) -> Option<PathBuf> {
PathBuf::from(file!()).parent()
.map(|p| p.parent())
.flatten()
.map(|p| p.join(data_dir))
}
pub fn linux_kernel() -> Result<SampleData, String>{
setup();
download_and_unpack(KERNEL)
}
pub fn cargo_sources() -> Result<SampleData, String>{
setup();
download_and_unpack(CARGO)
}
#[test]
fn test_kernel() {
std::env::set_var("RUST_LOG", "INFO");
let _ = env_logger::builder().try_init();
info!("{:?}", cargo_sources());
}