1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//! This submodule provides tools related to reading and handling data.
//!
//! Given that the most popular datasets for recommender systems are stored
//! as csv files, that's the only supported format at the moment.
//!
//! # Examples
//!
//! ```ignore
//! use quackin::data::{DefaultRecord, read_records}
//!
//! let records: Vec<DefaultRecord> = read_records("path/to/file", None, true);
//! // ^^^^ ^^^^
//! // | |
//! // | the file has headers
//! // use ',' as separator
//! for record in records {
//! println!("{} {} {}", record.user_id, record.item_id, record.rating);
//! }
use csv;
use Hash;
use Decodable;
/// Trait that every record must satisfy.
///
/// This is intended to be used when a dataset has a order in its columns
/// different from the one of `BaseRecord` which is `user_id,item_id,rating`
/// or when the dataset has more columns.
///
/// It would be cool to add a derive for this trait.
///
/// # Examples
///
/// Lets suppose that we have a dataset with the following columns:
/// `user_id,product_name,rating,timestamp`, we will write a struct for
/// decoding such data.
///
/// ```ignore
/// use rustc_serialize::Decodable;
/// use quackin::data::Record;
///
/// #[derive(RustcDecodable)]
/// struct MyRecord {
/// user_id: u32,
/// product_name: String, // fields can have any name
/// rating: f64,
/// timestamp: u64, // we can have additional fields
/// }
///
/// impl Record<u32, String> for MyRecord {
/// fn get_user_id(&self) -> &u32 {
/// &self.user_id
/// }
/// fn get_item_id(&self) -> &String {
/// &self.product_name
/// }
/// fn get_rating(&self) -> f64 {
/// self.rating
/// }
/// }
///
/// // Now we can read the records
/// let my_records: Vec<MyRecord> = read_records("path/to/dataset", None, false).unwrap();
/// ```
/// A record consisting only of an `user_id`, an `item_id` and a `rating`
/// A `BaseRecord` where `user_id` and `item_id` are of type `String`
pub type DefaultRecord = ;
/// Reads a csv file and loads its contents into a `Vec` of records.
///
/// `delimiter` defines if a delimiter must be used when reading the csv file,
/// if is `None` it uses a `,` as default. `has_headers` defines if the csv file
/// has headers or not.
///
/// Currently this function assumes that the records are stored on an `struct`
/// that implements `Record` because there is no way of dinamically setting the
/// number of columns nor the order of these. This needs refinement, but it
/// works.