1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
//! Built-in datasets for easy testing and experimentation.
use std::env;
use std::fs::{create_dir_all, rename, File};
use std::io::BufWriter;
use std::path::{Path, PathBuf};

use csv;
use failure;
use rand;
use rand::Rng;
use reqwest;

use data::{Interaction, Interactions};

/// Dataset error types.
#[derive(Debug, Fail)]
pub enum DatasetError {
    /// Can't find the home directory.
    #[fail(display = "Cannot find home directory.")]
    NoHomeDir,
}

fn create_data_dir() -> Result<PathBuf, failure::Error> {
    let path = env::home_dir()
        .ok_or_else(|| DatasetError::NoHomeDir)?
        .join(".sbr-rs");

    if !path.exists() {
        create_dir_all(&path)?;
    }

    Ok(path)
}

fn download(url: &str, dest_filename: &Path) -> Result<Interactions, failure::Error> {
    let data_dir = create_data_dir()?;
    let desired_filename = data_dir.join(dest_filename);

    if !desired_filename.exists() {
        let temp_filename = env::temp_dir().join(format!(
            "{}",
            rand::thread_rng()
                .sample_iter(&rand::distributions::Alphanumeric)
                .take(10)
                .collect::<String>()
        ));

        let file = File::create(&temp_filename)?;
        let mut writer = BufWriter::new(file);

        let mut response = reqwest::get(url)?;
        response.copy_to(&mut writer)?;

        rename(temp_filename, &desired_filename)?;
    }

    let mut reader = csv::Reader::from_path(desired_filename)?;
    let interactions: Vec<Interaction> = reader.deserialize().collect::<Result<Vec<_>, _>>()?;

    Ok(Interactions::from(interactions))
}

/// Download the Movielens 100K dataset and return it.
///
/// The data is stored in `~/.sbr-rs/`.
pub fn download_movielens_100k() -> Result<Interactions, failure::Error> {
    download(
        "https://github.com/maciejkula/sbr-rs/raw/master/data.csv",
        Path::new("movielens_100K.csv"),
    )
}