use chrono::NaiveDateTime;
use rayon::prelude::*;
use std::sync::Arc;
use crate::sessions::RocksDBSessionStore;
use crate::vmisknn::vmis_index::VMISIndex;
pub struct SharedHandlesAndConfig {
pub session_store: Arc<RocksDBSessionStore>,
pub vmis_index: Arc<VMISIndex>,
pub m_most_recent_sessions: usize,
pub neighborhood_size_k: usize,
pub num_items_to_recommend: usize,
pub max_items_in_session: usize,
pub qty_workers: usize,
pub db_compaction_ttl_in_secs: usize,
pub enable_business_logic: bool,
pub example_item_id: usize,
}
pub struct TrainingDataStats {
pub descriptive_name: String,
pub qty_records: usize,
pub qty_unique_session_ids: usize,
pub qty_unique_item_ids: usize,
pub min_time_date_time: NaiveDateTime,
pub max_time_date_time: NaiveDateTime,
pub session_duration_p05: u64,
pub session_duration_p25: u64,
pub session_duration_p50: u64,
pub session_duration_p75: u64,
pub session_duration_p90: u64,
pub session_duration_p95: u64,
pub session_duration_p99: u64,
pub session_duration_p99_5: u64,
pub session_duration_p100: u64,
pub qty_events_p05: u64,
pub qty_events_p25: u64,
pub qty_events_p50: u64,
pub qty_events_p75: u64,
pub qty_events_p90: u64,
pub qty_events_p95: u64,
pub qty_events_p99: u64,
pub qty_events_p99_5: u64,
pub qty_events_p100: u64,
}
pub fn determine_training_data_statistics(
descriptive_name: &str,
training_data: &[(u32, u64, u32)],
) -> TrainingDataStats {
let qty_records = training_data.len();
let mut session_ids: Vec<u32> = training_data
.into_par_iter()
.map(|(session_id, _item_id, _time)| *session_id)
.collect();
session_ids.par_sort_unstable();
session_ids.dedup();
let qty_unique_session_ids = session_ids.len();
let mut item_ids: Vec<u64> = training_data
.into_par_iter()
.map(|(_session_id, item_id, _time)| *item_id)
.collect();
item_ids.par_sort_unstable();
item_ids.dedup();
let qty_unique_item_ids = item_ids.len();
let min_time = training_data
.par_iter()
.map(|(_session_id, _item_id, time)| time)
.min()
.unwrap();
let max_time = training_data
.par_iter()
.map(|(_session_id, _item_id, time)| time)
.max()
.unwrap();
println!("Using hardcoded session duration percentiles.");
let session_duration_p05 = 14_u64;
let session_duration_p25 = 77_u64;
let session_duration_p50 = 248_u64;
let session_duration_p75 = 681_u64;
let session_duration_p90 = 1316_u64;
let session_duration_p95 = 1862_u64;
let session_duration_p99 = 3359_u64;
let session_duration_p99_5 = 4087_u64;
let session_duration_p100 = 539931_u64;
println!("Using hardcoded qty event percentiles.");
let qty_events_p05 = 2_u64;
let qty_events_p25 = 2_u64;
let qty_events_p50 = 3_u64;
let qty_events_p75 = 6_u64;
let qty_events_p90 = 10_u64;
let qty_events_p95 = 14_u64;
let qty_events_p99 = 27_u64;
let qty_events_p99_5 = 34_u64;
let qty_events_p100 = 9408_u64;
let min_time_date_time = NaiveDateTime::from_timestamp(*min_time as i64, 0);
let max_time_date_time = NaiveDateTime::from_timestamp(*max_time as i64, 0);
println!("Loaded {}", descriptive_name);
println!("\tEvents: {}", qty_records);
println!("\tSessions: {}", qty_unique_session_ids);
println!("\tItems: {}", qty_unique_item_ids);
println!("\tSpan: {} / {}", min_time_date_time, max_time_date_time);
print!("\tSession duration percentiles (secs): ");
print!(" p5={}", &session_duration_p05);
print!(" p25={}", &session_duration_p25);
print!(" p50={}", &session_duration_p50);
print!(" p75={}", &session_duration_p75);
print!(" p90={}", &session_duration_p90);
print!(" p95={}", &session_duration_p95);
print!(" p99={}", &session_duration_p99);
print!(" p99.5={}", &session_duration_p99_5);
println!(" p100={}", &session_duration_p100);
print!("\tSession qty event percentiles: ");
print!(" p5={}", &qty_events_p05);
print!(" p25={}", &qty_events_p25);
print!(" p50={}", &qty_events_p50);
print!(" p75={}", &qty_events_p75);
print!(" p90={}", &qty_events_p90);
print!(" p95={}", &qty_events_p95);
print!(" p99={}", &qty_events_p99);
print!(" p99.5={}", &qty_events_p99_5);
println!(" p100={}", &qty_events_p100);
TrainingDataStats {
descriptive_name: descriptive_name.to_string(),
qty_records,
qty_unique_session_ids,
qty_unique_item_ids,
min_time_date_time,
max_time_date_time,
session_duration_p05,
session_duration_p25,
session_duration_p50,
session_duration_p75,
session_duration_p90,
session_duration_p95,
session_duration_p99,
session_duration_p99_5,
session_duration_p100,
qty_events_p05,
qty_events_p25,
qty_events_p50,
qty_events_p75,
qty_events_p90,
qty_events_p95,
qty_events_p99,
qty_events_p99_5,
qty_events_p100,
}
}