industry_coefficient/
industry-coefficient.rs

1//! $ cargo run --release --example industry-coefficient
2//!
3//! Computes a coefficient for each crate that is:
4//!   • HIGH if the crate is disproportionately often downloaded on weekDAYS,
5//!   • and LOW if the crate is disproportionately often downloaded on weekENDS.
6
7use chrono::{Datelike, TimeDelta, Weekday};
8use db_dump::crates::CrateId;
9use db_dump::versions::VersionId;
10use std::collections::BTreeMap as Map;
11
12const DOWNLOADS_CUTOFF: u64 = 1_000_000;
13
14#[derive(Default)]
15struct Downloads {
16    weekday: u64,
17    weekend: u64,
18}
19
20fn main() -> db_dump::Result<()> {
21    let mut crates: Map<CrateId, String> = Map::new();
22    let mut versions: Map<VersionId, CrateId> = Map::new();
23    let mut version_downloads = Vec::new();
24    db_dump::Loader::new()
25        .crates(|row| {
26            crates.insert(row.id, row.name);
27        })
28        .versions(|row| {
29            versions.insert(row.id, row.crate_id);
30        })
31        .version_downloads(|row| version_downloads.push(row))
32        .load("./db-dump.tar.gz")?;
33
34    let max_date = version_downloads.iter().map(|row| row.date).max().unwrap();
35    let start_date = max_date - TimeDelta::try_weeks(6).unwrap();
36
37    // Add up downloads by crate by date
38    let mut downloads: Map<CrateId, Downloads> = Map::new();
39    for row in version_downloads {
40        // Deliberately cut out the largest date in the db-dump, because the
41        // data is partial.
42        if row.date >= start_date && row.date < max_date {
43            let crate_id = versions[&row.version_id];
44            let downloads = downloads.entry(crate_id).or_insert_with(Downloads::default);
45            match row.date.weekday() {
46                Weekday::Tue | Weekday::Wed | Weekday::Thu => downloads.weekday += row.downloads,
47                Weekday::Sat | Weekday::Sun => downloads.weekend += row.downloads,
48                // Disregard these to reduce some boundary effect from
49                // downloaders not being perfectly aligned with UTC.
50                Weekday::Mon | Weekday::Fri => {}
51            }
52        }
53    }
54
55    let mut downloads_vec = Vec::new();
56    let mut total = Downloads::default();
57    for (crate_id, downloads) in downloads {
58        total.weekday += downloads.weekday;
59        total.weekend += downloads.weekend;
60        let crate_name = &crates[&crate_id];
61        if downloads.weekend > 0
62            && (downloads.weekday + downloads.weekend >= DOWNLOADS_CUTOFF || crate_name == "cxx")
63        {
64            let coefficient = downloads.weekday as f64 / downloads.weekend as f64;
65            downloads_vec.push((crate_name, coefficient));
66        }
67    }
68
69    let mean = total.weekday as f64 / total.weekend as f64;
70    downloads_vec.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
71
72    for (crate_name, coefficient) in downloads_vec {
73        println!("{:>36}  {:+.4}", crate_name, coefficient - mean);
74    }
75
76    Ok(())
77}