1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
use std::path::PathBuf;
use std::fs::File;
use std::fs;
use std::io;
use env_logger;
use log::*;
use unzip::Unzipper;
use walkdir::WalkDir;

pub struct DataSource {
    pub name: &'static str,
    pub url: &'static str
}

#[derive(Debug)]
pub struct SampleData {
    pub root: PathBuf,
    pub num_files: u64,
    pub size: u64,
    pub archive: PathBuf
}

impl SampleData {
    pub fn remove(&self) -> io::Result<()>{
        fs::remove_dir_all(&self.root)?;
        fs::remove_file(&self.archive)
    }
}

const KERNEL: DataSource = DataSource {
    name: "Linux_Kernel",
    url: "https://github.com/torvalds/linux/archive/v5.9.zip"
};


const CARGO: DataSource = DataSource {
    name: "Cargo_sources",
    url: "https://github.com/rust-lang/cargo/archive/0.47.0.zip"
};


fn download_and_unpack(ds: DataSource) -> Result<SampleData, String> {
    std::fs::create_dir_all(ds.name);

    let archive = format!("{}.zip", ds.name);
    let mut num_files = 0;
    let mut size = 0;


    if !std::path::Path::new(&archive).is_file() {
        info!("Downloading {:?}", ds.url);

        let mut resp = reqwest::blocking::get(ds.url).map_err(|e| format!("{:?}", e))?;
        let mut out = File::create(&archive).map_err(|e| format!("{:?}", e))?;
        std::io::copy(&mut resp, &mut out).map_err(|e| format!("{:?}", e))?;
    } else {
        info!("Did not download, archive already present");
    }

    info!("Unzipping...");

    Unzipper::new(File::open(&archive).unwrap(), ds.name).unzip().map_err(|e| format!("{:?}", e))?;
    info!("Sample data ready. Gathering stats...");

    for entry in WalkDir::new(ds.name).into_iter().filter_map(|e| e.ok()) {
        println!("{}", entry.path().display());
        num_files +=1;
        if let Ok(meta) = entry.metadata() {
            size += meta.len();
        }
    }

    data_path(ds.name).ok_or("Could not get data dir".to_string()).map(|x| SampleData {
        root: x,
        archive: std::path::PathBuf::from(&archive),
        num_files,
        size
    })
}

fn setup() {
    std::env::set_var("RUST_LOG", "INFO");
    let _ = env_logger::builder().try_init();
}


fn data_path(data_dir: &str) -> Option<PathBuf> {
    PathBuf::from(file!()).parent()
    .map(|p| p.parent())
    .flatten()
    .map(|p| p.join(data_dir))
}


pub fn linux_kernel() -> Result<SampleData, String>{
    setup();
    download_and_unpack(KERNEL)
}

/// Proveide cargo sources
pub fn cargo_sources() -> Result<SampleData, String>{
    setup();
    download_and_unpack(CARGO)
}

#[test]
fn test_kernel() {
    // info!("{:?}", linux_kernel());
    info!("{:?}", cargo_sources());
}