#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(unused_imports)]
extern crate csv;
extern crate rand;
extern crate sbr;
extern crate wyrm;
#[macro_use]
extern crate serde_derive;
extern crate serde;
extern crate serde_json;
use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::{BufReader, Read};
use std::collections::HashSet;
use std::time::{Duration, Instant};
use sbr::data::{user_based_split, CompressedInteractions, Interaction, Interactions};
use sbr::evaluation::mrr_score;
use sbr::models::lstm;
#[derive(Deserialize, Serialize)]
struct GoodbooksInteraction {
user_id: usize,
book_id: usize,
rating: usize,
}
fn load_goodbooks(path: &str) -> Interactions {
let mut reader = csv::Reader::from_path(path).unwrap();
let mut interactions: Vec<Interaction> = reader
.deserialize::<GoodbooksInteraction>()
.map(|x| x.unwrap())
.enumerate()
.map(|(i, x)| Interaction::new(x.user_id, x.book_id, i))
.collect();
interactions.sort_by_key(|x| x.user_id());
Interactions::from(interactions[..1_000_000].to_owned())
}
fn load_dummy() -> Interactions {
let num_users = 100;
let num_items = 50;
let mut interactions = Vec::new();
for user in 0..num_users {
for item in 0..num_items {
interactions.push(Interaction::new(user, 1000 + item, item));
}
}
Interactions::from(interactions)
}
#[derive(Debug, Serialize, Deserialize)]
struct Result {
test_mrr: f32,
train_mrr: f32,
elapsed: Duration,
hyperparameters: lstm::Hyperparameters,
}
fn load_movielens(path: &str) -> Interactions {
let mut reader = csv::Reader::from_path(path).unwrap();
let interactions: Vec<Interaction> = reader.deserialize().map(|x| x.unwrap()).collect();
let interactions = rand::seq::sample_slice(&mut rand::thread_rng(), &interactions, 100000);
Interactions::from(interactions)
}
fn fit(train: &CompressedInteractions, hyper: lstm::Hyperparameters) -> lstm::ImplicitLSTMModel {
let mut model = hyper.build();
model.fit(train).unwrap();
model
}
fn main() {
let data = load_movielens("data.csv");
let mut rng = rand::thread_rng();
let (train, test) = user_based_split(&data, &mut rng, 0.2);
let train = train.to_compressed();
let test = test.to_compressed();
println!(
"Train {} {} {}",
train.num_users(),
train.num_items(),
data.len()
);
for _ in 0..1000 {
let mut results: Vec<Result> = File::open("lstm_results.json")
.map(|file| serde_json::from_reader(&file).unwrap())
.unwrap_or(Vec::new());
let hyper = lstm::Hyperparameters::random(data.num_items(), &mut rng);
println!("Running {:#?}", &hyper);
let start = Instant::now();
let model = fit(&train, hyper.clone());
let result = Result {
train_mrr: mrr_score(&model, &train).unwrap(),
test_mrr: mrr_score(&model, &test).unwrap(),
elapsed: start.elapsed(),
hyperparameters: hyper,
};
println!("{:#?}", result);
if !result.test_mrr.is_nan() {
results.push(result);
results.sort_by(|a, b| a.test_mrr.partial_cmp(&b.test_mrr).unwrap());
}
println!("Best result: {:#?}", results.last());
File::create("lstm_results.json")
.map(|file| serde_json::to_writer_pretty(&file, &results).unwrap())
.unwrap();
}
}