use std::borrow::Borrow;
use std::cell::RefCell;
use std::collections::HashSet;
use std::hash::Hash;
use std::iter::Empty;
use crate::map::Map;
use crate::matrix::{CooMatrix, DenseMatrix, LilMatrix};
use crate::prng::Prng;
#[derive(Clone, Debug)]
pub struct FitInfo {
pub iteration: u32,
pub train_loss: f32,
pub valid_loss: f32,
}
pub struct RecommenderBuilder<'a> {
factors: u32,
iterations: u32,
regularization: Option<f32>,
learning_rate: f32,
alpha: f32,
callback: Option<Box<dyn 'a + Fn(FitInfo)>>,
seed: Option<u64>,
}
impl<'a> RecommenderBuilder<'a> {
pub fn new() -> Self {
Self {
factors: 8,
iterations: 20,
regularization: None,
learning_rate: 0.1,
alpha: 40.0,
callback: None,
seed: None,
}
}
pub fn factors(&mut self, value: u32) -> &mut Self {
self.factors = value;
self
}
pub fn iterations(&mut self, value: u32) -> &mut Self {
self.iterations = value;
self
}
pub fn regularization(&mut self, value: f32) -> &mut Self {
self.regularization = Some(value);
self
}
pub fn learning_rate(&mut self, value: f32) -> &mut Self {
self.learning_rate = value;
self
}
pub fn alpha(&mut self, value: f32) -> &mut Self {
self.alpha = value;
self
}
pub fn callback<C: 'a + Fn(FitInfo)>(&mut self, value: C) -> &mut Self {
self.callback = Some(Box::new(value));
self
}
pub fn seed(&mut self, value: u64) -> &mut Self {
self.seed = Some(value);
self
}
pub fn fit_explicit<T, U, I>(&self, train_set: I) -> Recommender<T, U>
where
T: Clone + Eq + Hash,
U: Clone + Eq + Hash,
I: IntoIterator,
I::Item: Borrow<(T, U, f32)>,
{
self.fit(train_set, None::<Empty<(T, U, f32)>>, false)
}
pub fn fit_implicit<T, U, I>(&self, train_set: I) -> Recommender<T, U>
where
T: Clone + Eq + Hash,
U: Clone + Eq + Hash,
I: IntoIterator,
I::Item: Borrow<(T, U, f32)>,
{
self.fit(train_set, None::<Empty<(T, U, f32)>>, true)
}
pub fn fit_eval_explicit<T, U, I, J>(&self, train_set: I, valid_set: J) -> Recommender<T, U>
where
T: Clone + Eq + Hash,
U: Clone + Eq + Hash,
I: IntoIterator,
I::Item: Borrow<(T, U, f32)>,
J: IntoIterator,
J::Item: Borrow<(T, U, f32)>,
{
self.fit(train_set, Some(valid_set), false)
}
fn fit<T, U, I, J>(
&self,
train_set: I,
valid_set: Option<J>,
implicit: bool,
) -> Recommender<T, U>
where
T: Clone + Eq + Hash,
U: Clone + Eq + Hash,
I: IntoIterator,
I::Item: Borrow<(T, U, f32)>,
J: IntoIterator,
J::Item: Borrow<(T, U, f32)>,
{
let train_set = train_set.into_iter();
let valid_set = valid_set.map(|v| v.into_iter());
let mut user_map = Map::new();
let mut item_map = Map::new();
let mut rated = Vec::new();
let capacity = if implicit { 0 } else { train_set.size_hint().0 };
let mut train_data = CooMatrix::with_capacity(capacity);
let mut sum = 0.0;
let mut cui = LilMatrix::new();
let mut ciu = LilMatrix::new();
for item in train_set {
let (user_id, item_id, value) = item.borrow();
let u = user_map.add(user_id.clone());
let i = item_map.add(item_id.clone());
if implicit {
let confidence = 1.0 + self.alpha * (*value);
cui.push(u, i, confidence);
ciu.push(i, u, confidence);
} else {
train_data.push(u, i, *value);
sum += *value;
}
if u == rated.len() {
rated.push(HashSet::new());
}
rated[u].insert(i);
}
let valid_data = valid_set.map(|vs| {
let mut valid_data = CooMatrix::with_capacity(vs.size_hint().0);
for item in vs {
let (user_id, item_id, value) = item.borrow();
valid_data.push(
user_map.get(user_id).copied().unwrap_or(usize::MAX),
item_map.get(item_id).copied().unwrap_or(usize::MAX),
*value,
)
}
valid_data
});
let global_mean = if implicit {
0.0
} else {
sum / train_data.len() as f32
};
let users = user_map.len();
let items = item_map.len();
let factors = self.factors as usize;
let mut prng = match self.seed {
Some(s) => Prng::from_seed(s),
None => Prng::new(),
};
let end_range = if implicit { 0.01 } else { 0.1 };
let user_factors = create_factors(users, factors, &mut prng, end_range);
let item_factors = create_factors(items, factors, &mut prng, end_range);
let mut recommender = Recommender {
user_map,
item_map,
rated,
global_mean,
user_factors,
item_factors,
user_norms: RefCell::new(None),
item_norms: RefCell::new(None),
};
if implicit {
let regularization = self.regularization.unwrap_or(0.01);
for iteration in 0..self.iterations {
least_squares_cg(
&cui,
&mut recommender.user_factors,
&recommender.item_factors,
regularization,
);
least_squares_cg(
&ciu,
&mut recommender.item_factors,
&recommender.user_factors,
regularization,
);
if let Some(callback) = &self.callback {
let info = FitInfo {
iteration: iteration + 1,
train_loss: f32::NAN,
valid_loss: f32::NAN,
};
(callback)(info);
}
}
} else {
let learning_rate = self.learning_rate;
let lambda = self.regularization.unwrap_or(0.1);
let k = factors;
let ks = ((k as f32 * 0.08).round() as usize).max(1);
let mut g_slow: Vec<f32> = vec![1.0; users];
let mut g_fast: Vec<f32> = vec![1.0; users];
let mut h_slow: Vec<f32> = vec![1.0; items];
let mut h_fast: Vec<f32> = vec![1.0; items];
for iteration in 0..self.iterations {
let mut train_loss = 0.0;
for j in sample(&mut prng, train_data.len()) {
let (u, v, r) = train_data.get(j);
let pu = recommender.user_factors.row_mut(u);
let qv = recommender.item_factors.row_mut(v);
let e = r - dot(pu, qv);
let mut g_hat = 0.0;
let mut h_hat = 0.0;
let nu = learning_rate * g_slow[u].sqrt().recip();
let nv = learning_rate * h_slow[v].sqrt().recip();
for d in 0..ks {
let gud = -e * qv[d] + lambda * pu[d];
let hvd = -e * pu[d] + lambda * qv[d];
g_hat += gud * gud;
h_hat += hvd * hvd;
pu[d] -= nu * gud;
qv[d] -= nv * hvd;
}
g_slow[u] += g_hat / ks as f32;
h_slow[v] += h_hat / ks as f32;
if iteration > 0 {
let mut g_hat = 0.0;
let mut h_hat = 0.0;
let nu = learning_rate * g_fast[u].sqrt().recip();
let nv = learning_rate * h_fast[v].sqrt().recip();
for d in ks..k {
let gud = -e * qv[d] + lambda * pu[d];
let hvd = -e * pu[d] + lambda * qv[d];
g_hat += gud * gud;
h_hat += hvd * hvd;
pu[d] -= nu * gud;
qv[d] -= nv * hvd;
}
g_fast[u] += g_hat / (k - ks) as f32;
h_fast[v] += h_hat / (k - ks) as f32;
}
train_loss += e * e;
}
if let Some(callback) = &self.callback {
train_loss = (train_loss / train_data.len() as f32).sqrt();
let valid_loss = match &valid_data {
Some(m) => rmse(m.into_iter().map(|((u, i), v)| {
let user_index = if *u != usize::MAX { Some(u) } else { None };
let item_index = if *i != usize::MAX { Some(i) } else { None };
(*v, recommender.inner_predict(user_index, item_index))
})),
None => f32::NAN,
};
let info = FitInfo {
iteration: iteration + 1,
train_loss,
valid_loss,
};
(callback)(info);
}
}
}
recommender
}
}
impl Default for RecommenderBuilder<'_> {
fn default() -> Self {
Self::new()
}
}
pub struct Recommender<T, U> {
user_map: Map<T>,
item_map: Map<U>,
rated: Vec<HashSet<usize>>,
global_mean: f32,
user_factors: DenseMatrix,
item_factors: DenseMatrix,
user_norms: RefCell<Option<Vec<f32>>>,
item_norms: RefCell<Option<Vec<f32>>>,
}
impl<T: Clone + Eq + Hash, U: Clone + Eq + Hash> Recommender<T, U> {
pub fn fit_explicit<I>(train_set: I) -> Recommender<T, U>
where
I: IntoIterator,
I::Item: Borrow<(T, U, f32)>,
{
RecommenderBuilder::new().fit_explicit(train_set)
}
pub fn fit_implicit<I>(train_set: I) -> Recommender<T, U>
where
I: IntoIterator,
I::Item: Borrow<(T, U, f32)>,
{
RecommenderBuilder::new().fit_implicit(train_set)
}
pub fn predict(&self, user_id: &T, item_id: &U) -> f32 {
self.inner_predict(self.user_map.get(user_id), self.item_map.get(item_id))
}
pub fn user_recs(&self, user_id: &T, count: usize) -> Vec<(&U, f32)> {
let i = match self.user_map.get(user_id) {
Some(o) => *o,
None => return Vec::new(),
};
let rated = &self.rated[i];
let predictions = self.item_factors.dot(self.user_factors.row(i));
let mut predictions: Vec<_> = predictions.iter().enumerate().collect();
predictions.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap());
predictions
.iter()
.filter(|v| !rated.contains(&v.0))
.map(|v| (self.item_map.lookup(v.0), *v.1))
.take(count)
.collect()
}
pub fn item_recs(&self, item_id: &U, count: usize) -> Vec<(&U, f32)> {
similar(
&self.item_map,
&self.item_factors,
self.item_norms(),
item_id,
count,
)
}
pub fn similar_users(&self, user_id: &T, count: usize) -> Vec<(&T, f32)> {
similar(
&self.user_map,
&self.user_factors,
self.user_norms(),
user_id,
count,
)
}
fn user_norms(&self) -> &RefCell<Option<Vec<f32>>> {
self.user_norms
.borrow_mut()
.get_or_insert_with(|| norms(&self.user_factors));
&self.user_norms
}
fn item_norms(&self) -> &RefCell<Option<Vec<f32>>> {
self.item_norms
.borrow_mut()
.get_or_insert_with(|| norms(&self.item_factors));
&self.item_norms
}
pub fn user_ids(&self) -> &[T] {
self.user_map.ids()
}
pub fn item_ids(&self) -> &[U] {
self.item_map.ids()
}
pub fn user_factors(&self, user_id: &T) -> Option<&[f32]> {
self.user_map
.get(user_id)
.map(|o| self.user_factors.row(*o))
}
pub fn item_factors(&self, item_id: &U) -> Option<&[f32]> {
self.item_map
.get(item_id)
.map(|o| self.item_factors.row(*o))
}
pub fn global_mean(&self) -> f32 {
self.global_mean
}
pub fn rmse<I>(&self, data: I) -> f32
where
I: IntoIterator,
I::Item: Borrow<(T, U, f32)>,
{
rmse(data.into_iter().map(|item| {
let (user_id, item_id, value) = item.borrow();
(*value, self.predict(user_id, item_id))
}))
}
fn inner_predict(&self, user_index: Option<&usize>, item_index: Option<&usize>) -> f32 {
match (user_index, item_index) {
(Some(i), Some(j)) => dot(self.user_factors.row(*i), self.item_factors.row(*j)),
_ => self.global_mean,
}
}
}
fn rmse<I>(data: I) -> f32
where
I: IntoIterator<Item = (f32, f32)>,
{
let mut sum = 0.0;
let mut count = 0;
for (v, v2) in data {
let error = v - v2;
sum += error * error;
count += 1;
}
(sum / count as f32).sqrt()
}
fn least_squares_cg(cui: &LilMatrix, x: &mut DenseMatrix, y: &DenseMatrix, regularization: f32) {
let cg_steps = 3;
let factors = x.cols;
let mut yty = DenseMatrix::new(factors, factors);
for (i, row) in &mut yty.rows_mut().enumerate() {
for (j, v) in &mut row.iter_mut().enumerate() {
*v = y.rows().map(|r| r[i] * r[j]).sum();
}
row[i] += regularization;
}
for (u, row_vec) in cui.into_iter().enumerate() {
let xi = x.row_mut(u);
let mut r = yty.dot(xi);
neg(&mut r);
for (i, confidence) in row_vec {
scaled_add(
&mut r,
confidence - (confidence - 1.0) * dot(y.row(*i), xi),
y.row(*i),
);
}
let mut p = r.clone();
let mut rsold = dot(&r, &r);
for _ in 0..cg_steps {
let mut ap = yty.dot(&p);
for (i, confidence) in row_vec {
scaled_add(&mut ap, (confidence - 1.0) * dot(y.row(*i), &p), y.row(*i));
}
let alpha = rsold / dot(&p, &ap);
scaled_add(xi, alpha, &p);
scaled_add(&mut r, -alpha, &ap);
let rsnew = dot(&r, &r);
if rsnew < 1e-20 {
break;
}
let rs = rsnew / rsold;
for (pi, ri) in p.iter_mut().zip(&r) {
*pi = ri + rs * (*pi);
}
rsold = rsnew;
}
}
}
fn create_factors(rows: usize, cols: usize, prng: &mut Prng, end_range: f32) -> DenseMatrix {
let mut m = DenseMatrix::new(rows, cols);
for row in &mut m.rows_mut() {
for v in row {
*v = (prng.next() as f32) * end_range;
}
}
m
}
fn similar<'a, T: Clone + Eq + Hash>(
map: &'a Map<T>,
factors: &DenseMatrix,
norms: &RefCell<Option<Vec<f32>>>,
id: &T,
count: usize,
) -> Vec<(&'a T, f32)> {
let i = match map.get(id) {
Some(o) => *o,
None => return Vec::new(),
};
let borrowed = norms.borrow();
let norms = borrowed.as_ref().unwrap();
let norm = norms[i];
let predictions: Vec<_> = factors
.dot(factors.row(i))
.iter()
.zip(norms)
.map(|(v, n)| v / (norm * n).max(f32::EPSILON))
.collect();
let mut predictions: Vec<_> = predictions.iter().enumerate().collect();
predictions.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap());
predictions
.iter()
.filter(|v| v.0 != i)
.map(|v| (map.lookup(v.0), *v.1))
.take(count)
.collect()
}
fn norms(factors: &DenseMatrix) -> Vec<f32> {
factors
.rows()
.map(|row| row.iter().map(|v| v * v).sum::<f32>().sqrt())
.collect()
}
fn dot(a: &[f32], b: &[f32]) -> f32 {
a.iter().zip(b).map(|(ai, bi)| ai * bi).sum()
}
fn scaled_add(x: &mut [f32], a: f32, v: &[f32]) {
for (xi, vi) in x.iter_mut().zip(v) {
*xi += a * vi;
}
}
fn neg(x: &mut [f32]) {
for v in x {
*v = -(*v);
}
}
fn sample(prng: &mut Prng, n: usize) -> Vec<usize> {
let mut v: Vec<usize> = (0..n).collect();
for i in (1..n).rev() {
let j = (prng.next() * (i as f64 + 1.0)) as usize;
(v[i], v[j]) = (v[j], v[i]);
}
v
}