Function recoreco::indicators

source ·
pub fn indicators<T>(
    interactions: T,
    data_dict: &DataDictionary,
    num_indicators_per_item: usize,
    f_max: u32,
    k_max: u32
) -> SparseBinaryMatrixwhere
    T: Iterator<Item = (String, String)>,
Expand description

Compute item indicators from a stream of interactions.

  • interactions - the observed interactions
  • data_dict - a data dictionary which maps string to integer identifiers
  • num_indicators_per_item - the number of highly associated items to compute per item (use 10 as default)
  • f_max - the maximum number of interactions to account for per user (use 500 as default)
  • k_max - The maximum number of interactions to account for per item (use 500 as default)

Examples

Basic usage:

extern crate recoreco;
use recoreco::stats::{DataDictionary, Renaming};
use recoreco::indicators;

/* Our input data comprises of observed interactions between users and items.
   The identifiers used can be strings of arbitrary length and structure. */

let interactions = vec![
    (String::from("alice"), String::from("apple")),
    (String::from("alice"), String::from("dog")),
    (String::from("alice"), String::from("pony")),
    (String::from("bob"), String::from("apple")),
    (String::from("bob"), String::from("pony")),
    (String::from("charles"), String::from("pony")),
    (String::from("charles"), String::from("bike"))
];

/* Internally, recoreco uses consecutive integer ids and requires some knowledge about
   the statistics of the data for efficient allocation. Therefore, we read the
   interaction data once to compute a data dictionary that helps us map from string to
   integer identifiers and has basic statistics of the data */

let data_dict = DataDictionary::from(interactions.iter());

println!(
    "Found {} interactions between {} users and {} items.",
    data_dict.num_interactions(),
    data_dict.num_users(),
    data_dict.num_items(),
);

/* Now we read the interactions a second time and compute the indicator matrix from item
   cooccurrences. The result is the so-called indicator matrix, where each entry
   indicates highly associated pairs of items. */

let indicated_items = indicators(
    interactions.into_iter(),
    &data_dict,
    10,
    500,
    500
);

/* The renaming data structure helps us map the integer ids back to the original
   string ids. */

let renaming = Renaming::from(data_dict);

/* We print the resulting highly associated pairs of items. */
for (item_index, indicated_items_for_item) in indicated_items.iter().enumerate() {
    let item_name = renaming.item_name(item_index as u32);
    println!("Items highly associated with {}:", item_name);

    for indicated_item_index in indicated_items_for_item.iter() {
        let indicated_item_name = renaming.item_name(*indicated_item_index as u32);
        println!("\t{}", indicated_item_name);
    }
}