top_crates/
top-crates.rs

1//! cargo run --release --example top-crates
2//!
3//! Computes the top few most directly depended upon crates.
4
5use db_dump::crates::CrateId;
6use db_dump::versions::VersionId;
7use std::cmp::Reverse;
8use std::collections::btree_map::Entry;
9use std::collections::{BTreeMap as Map, BTreeSet as Set};
10
11const N: usize = 12;
12
13fn main() -> db_dump::Result<()> {
14    // Map of crate id to the most recently published version of that crate.
15    let mut most_recent = Map::new();
16
17    let mut crates = Set::new();
18    let mut crate_owners = Vec::new();
19    let mut dependencies = Vec::new();
20    db_dump::Loader::new()
21        .crates(|row| {
22            crates.insert(row);
23        })
24        .crate_owners(|row| crate_owners.push(row))
25        .dependencies(|row| dependencies.push(row))
26        .versions(|row| match most_recent.entry(row.crate_id) {
27            Entry::Vacant(entry) => {
28                entry.insert(row);
29            }
30            Entry::Occupied(mut entry) => {
31                if row.created_at > entry.get().created_at {
32                    entry.insert(row);
33                }
34            }
35        })
36        .load("./db-dump.tar.gz")?;
37
38    // Set of version ids which are the most recently published of their crate.
39    let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
40
41    // Set of (version id, dependency crate id) pairs to avoid double-counting
42    // cases where a crate has both a normal dependency and dev-dependency or
43    // build-dependency on the same dependency crate.
44    let mut unique_dependency_edges = Set::<(VersionId, CrateId)>::new();
45
46    // Map of crate id to how many other crates' most recent version depends on
47    // that crate.
48    let mut count = Map::<CrateId, usize>::new();
49    for dep in dependencies {
50        if most_recent.contains(&dep.version_id)
51            && unique_dependency_edges.insert((dep.version_id, dep.crate_id))
52        {
53            *count.entry(dep.crate_id).or_default() += 1;
54        }
55    }
56
57    // Quickselect and sort the top N crates by reverse dependency count.
58    let mut sort = Vec::from_iter(count);
59    let sort_by_count = |&(_crate, count): &_| Reverse(count);
60    sort.select_nth_unstable_by_key(N - 1, sort_by_count);
61    sort[..N].sort_unstable_by_key(sort_by_count);
62
63    for (id, count) in sort.iter().take(N) {
64        let crate_name = &crates.get(id).unwrap().name;
65        println!("{},{}", crate_name, count);
66    }
67
68    Ok(())
69}