pub struct Loader<'a> { /* private fields */ }
Expand description
Perform a streaming load of only relevant database tables.
§Example
This example loads just the version_downloads.csv table, in which each row is the download count for a single version of a single crate on a single day. We do not store the rows individually in memory but instead stream from the csv to accumulate just a total count per day across all crates, which requires far less memory.
use chrono::Utc;
use db_dump::Date;
use std::collections::BTreeMap as Map;
fn main() -> db_dump::Result<()> {
let mut downloads = Map::<Date<Utc>, u64>::new();
db_dump::Loader::new()
.version_downloads(|row| {
*downloads.entry(row.date).or_default() += row.downloads;
})
.load("./db-dump.tar.gz")?;
for (date, count) in downloads {
println!("{},{}", date, count);
}
Ok(())
}
Implementations§
source§impl<'a> Loader<'a>
impl<'a> Loader<'a>
sourcepub fn new() -> Self
pub fn new() -> Self
Examples found in repository?
examples/total-downloads.rs (line 12)
10 11 12 13 14 15 16 17 18 19 20 21 22 23
fn main() -> db_dump::Result<()> {
let mut downloads = Map::<Date<Utc>, u64>::new();
db_dump::Loader::new()
.version_downloads(|row| {
*downloads.entry(row.date).or_default() += row.downloads;
})
.load("./db-dump.tar.gz")?;
for (date, count) in downloads {
println!("{},{}", date, count);
}
Ok(())
}
More examples
examples/crate-downloads.rs (line 15)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
fn main() -> db_dump::Result<()> {
let mut crate_id = None;
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.crates(|row| {
if row.name == CRATE {
crate_id = Some(row.id);
}
})
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// Crate id of the crate we care about.
let crate_id = crate_id.expect("no such crate");
// Set of all version ids corresponding to that crate.
let mut version_ids = Set::new();
for version in versions {
if version.crate_id == crate_id {
version_ids.insert(version.id);
}
}
// Add up downloads across all version of the crate by day.
let mut downloads = Map::<Date<Utc>, u64>::new();
for stat in version_downloads {
if version_ids.contains(&stat.version_id) {
*downloads.entry(stat.date).or_default() += stat.downloads;
}
}
for (date, count) in downloads {
println!("{},{}", date, count);
}
Ok(())
}
examples/user-downloads.rs (line 23)
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids of all of those crates.
let mut their_versions = Set::new();
for version in versions {
if their_crates.contains(&version.crate_id) {
their_versions.insert(version.id);
}
}
// Add up downloads across that user's crates, as well as total downloads of
// all crates.
let mut downloads = Map::<Date<Utc>, Downloads>::new();
for stat in version_downloads {
let entry = downloads.entry(stat.date).or_default();
entry.all += stat.downloads;
if their_versions.contains(&stat.version_id) {
entry.theirs += stat.downloads;
}
}
// Print user's downloads as a fraction of total crates.io downloads by day.
for (date, downloads) in downloads {
if downloads.theirs > 0 {
println!(
"{},{}",
date,
downloads.theirs as f64 / downloads.all as f64,
);
}
}
Ok(())
}
examples/user-dependencies.rs (line 19)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut user_id = None;
let mut crates = 0;
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crates(|_row| crates += 1)
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of version ids which depend directly on at least one crate by the
// user.
let mut dep_on_them = Set::new();
for dep in dependencies {
if their_crates.contains(&dep.crate_id) {
dep_on_them.insert(dep.version_id);
}
}
// Number of crates whose most recent version depends on at least one crate
// by the user.
let result = dep_on_them.intersection(&most_recent).count();
println!(
"{} / {} = {:.1}%",
result,
crates,
100.0 * result as f64 / crates as f64,
);
Ok(())
}
examples/top-crates.rs (line 20)
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut crates = Set::new();
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.crates(|row| {
crates.insert(row);
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of (version id, dependency crate id) pairs to avoid double-counting
// cases where a crate has both a normal dependency and dev-dependency or
// build-dependency on the same dependency crate.
let mut unique_dependency_edges = Set::<(VersionId, CrateId)>::new();
// Map of crate id to how many other crates' most recent version depends on
// that crate.
let mut count = Map::<CrateId, usize>::new();
for dep in dependencies {
if most_recent.contains(&dep.version_id)
&& unique_dependency_edges.insert((dep.version_id, dep.crate_id))
{
*count.entry(dep.crate_id).or_default() += 1;
}
}
// Quickselect and sort the top N crates by reverse dependency count.
let mut sort = Vec::from_iter(count);
let sort_by_count = |&(_crate, count): &_| Reverse(count);
sort.select_nth_unstable_by_key(N - 1, sort_by_count);
sort[..N].sort_unstable_by_key(sort_by_count);
for (id, count) in sort.iter().take(N) {
let crate_name = &crates.get(id).unwrap().name;
println!("{},{}", crate_name, count);
}
Ok(())
}
examples/user-dependencies-graph.rs (line 15)
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut dependencies = Map::new();
let mut versions = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| {
dependencies
.entry(row.version_id)
.or_insert_with(Vec::new)
.push(row);
})
.versions(|row| {
if !row.yanked {
versions.push(row);
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
let mut total_deps = 0usize;
let mut their_deps = 0usize;
let mut last_printed_ratio = 0.0..=0.0;
let mut latest_version = Map::new();
versions.sort_by_key(|v| v.created_at);
for version in versions {
let no_deps = Vec::new();
if let Some(prev) = latest_version.insert(version.crate_id, version.id) {
for dep in dependencies.get(&prev).unwrap_or(&no_deps) {
total_deps -= 1;
their_deps -= their_crates.contains(&dep.crate_id) as usize;
}
}
for dep in dependencies.get(&version.id).unwrap_or(&no_deps) {
total_deps += 1;
their_deps += their_crates.contains(&dep.crate_id) as usize;
}
if total_deps != 0 {
let ratio = their_deps as f64 / total_deps as f64;
if !last_printed_ratio.contains(&ratio) {
println!("{},{:.3}", version.created_at.naive_utc(), ratio * 100.0);
last_printed_ratio = ratio * 0.99999..=ratio * 1.00001;
}
}
}
eprintln!(
"{} / {} ({:.02}%)",
their_deps,
total_deps,
(their_deps as f64 / total_deps as f64) * 100.0,
);
Ok(())
}
Additional examples can be found in:
pub fn categories(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn crate_downloads(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
sourcepub fn crate_owners(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn crate_owners(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
Examples found in repository?
examples/user-downloads.rs (line 29)
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids of all of those crates.
let mut their_versions = Set::new();
for version in versions {
if their_crates.contains(&version.crate_id) {
their_versions.insert(version.id);
}
}
// Add up downloads across that user's crates, as well as total downloads of
// all crates.
let mut downloads = Map::<Date<Utc>, Downloads>::new();
for stat in version_downloads {
let entry = downloads.entry(stat.date).or_default();
entry.all += stat.downloads;
if their_versions.contains(&stat.version_id) {
entry.theirs += stat.downloads;
}
}
// Print user's downloads as a fraction of total crates.io downloads by day.
for (date, downloads) in downloads {
if downloads.theirs > 0 {
println!(
"{},{}",
date,
downloads.theirs as f64 / downloads.all as f64,
);
}
}
Ok(())
}
More examples
examples/user-dependencies.rs (line 26)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut user_id = None;
let mut crates = 0;
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crates(|_row| crates += 1)
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of version ids which depend directly on at least one crate by the
// user.
let mut dep_on_them = Set::new();
for dep in dependencies {
if their_crates.contains(&dep.crate_id) {
dep_on_them.insert(dep.version_id);
}
}
// Number of crates whose most recent version depends on at least one crate
// by the user.
let result = dep_on_them.intersection(&most_recent).count();
println!(
"{} / {} = {:.1}%",
result,
crates,
100.0 * result as f64 / crates as f64,
);
Ok(())
}
examples/top-crates.rs (line 24)
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut crates = Set::new();
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.crates(|row| {
crates.insert(row);
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of (version id, dependency crate id) pairs to avoid double-counting
// cases where a crate has both a normal dependency and dev-dependency or
// build-dependency on the same dependency crate.
let mut unique_dependency_edges = Set::<(VersionId, CrateId)>::new();
// Map of crate id to how many other crates' most recent version depends on
// that crate.
let mut count = Map::<CrateId, usize>::new();
for dep in dependencies {
if most_recent.contains(&dep.version_id)
&& unique_dependency_edges.insert((dep.version_id, dep.crate_id))
{
*count.entry(dep.crate_id).or_default() += 1;
}
}
// Quickselect and sort the top N crates by reverse dependency count.
let mut sort = Vec::from_iter(count);
let sort_by_count = |&(_crate, count): &_| Reverse(count);
sort.select_nth_unstable_by_key(N - 1, sort_by_count);
sort[..N].sort_unstable_by_key(sort_by_count);
for (id, count) in sort.iter().take(N) {
let crate_name = &crates.get(id).unwrap().name;
println!("{},{}", crate_name, count);
}
Ok(())
}
examples/user-dependencies-graph.rs (line 21)
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut dependencies = Map::new();
let mut versions = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| {
dependencies
.entry(row.version_id)
.or_insert_with(Vec::new)
.push(row);
})
.versions(|row| {
if !row.yanked {
versions.push(row);
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
let mut total_deps = 0usize;
let mut their_deps = 0usize;
let mut last_printed_ratio = 0.0..=0.0;
let mut latest_version = Map::new();
versions.sort_by_key(|v| v.created_at);
for version in versions {
let no_deps = Vec::new();
if let Some(prev) = latest_version.insert(version.crate_id, version.id) {
for dep in dependencies.get(&prev).unwrap_or(&no_deps) {
total_deps -= 1;
their_deps -= their_crates.contains(&dep.crate_id) as usize;
}
}
for dep in dependencies.get(&version.id).unwrap_or(&no_deps) {
total_deps += 1;
their_deps += their_crates.contains(&dep.crate_id) as usize;
}
if total_deps != 0 {
let ratio = their_deps as f64 / total_deps as f64;
if !last_printed_ratio.contains(&ratio) {
println!("{},{:.3}", version.created_at.naive_utc(), ratio * 100.0);
last_printed_ratio = ratio * 0.99999..=ratio * 1.00001;
}
}
}
eprintln!(
"{} / {} ({:.02}%)",
their_deps,
total_deps,
(their_deps as f64 / total_deps as f64) * 100.0,
);
Ok(())
}
sourcepub fn crates(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn crates(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
Examples found in repository?
examples/crate-downloads.rs (lines 16-20)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
fn main() -> db_dump::Result<()> {
let mut crate_id = None;
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.crates(|row| {
if row.name == CRATE {
crate_id = Some(row.id);
}
})
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// Crate id of the crate we care about.
let crate_id = crate_id.expect("no such crate");
// Set of all version ids corresponding to that crate.
let mut version_ids = Set::new();
for version in versions {
if version.crate_id == crate_id {
version_ids.insert(version.id);
}
}
// Add up downloads across all version of the crate by day.
let mut downloads = Map::<Date<Utc>, u64>::new();
for stat in version_downloads {
if version_ids.contains(&stat.version_id) {
*downloads.entry(stat.date).or_default() += stat.downloads;
}
}
for (date, count) in downloads {
println!("{},{}", date, count);
}
Ok(())
}
More examples
examples/user-dependencies.rs (line 25)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut user_id = None;
let mut crates = 0;
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crates(|_row| crates += 1)
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of version ids which depend directly on at least one crate by the
// user.
let mut dep_on_them = Set::new();
for dep in dependencies {
if their_crates.contains(&dep.crate_id) {
dep_on_them.insert(dep.version_id);
}
}
// Number of crates whose most recent version depends on at least one crate
// by the user.
let result = dep_on_them.intersection(&most_recent).count();
println!(
"{} / {} = {:.1}%",
result,
crates,
100.0 * result as f64 / crates as f64,
);
Ok(())
}
examples/top-crates.rs (lines 21-23)
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut crates = Set::new();
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.crates(|row| {
crates.insert(row);
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of (version id, dependency crate id) pairs to avoid double-counting
// cases where a crate has both a normal dependency and dev-dependency or
// build-dependency on the same dependency crate.
let mut unique_dependency_edges = Set::<(VersionId, CrateId)>::new();
// Map of crate id to how many other crates' most recent version depends on
// that crate.
let mut count = Map::<CrateId, usize>::new();
for dep in dependencies {
if most_recent.contains(&dep.version_id)
&& unique_dependency_edges.insert((dep.version_id, dep.crate_id))
{
*count.entry(dep.crate_id).or_default() += 1;
}
}
// Quickselect and sort the top N crates by reverse dependency count.
let mut sort = Vec::from_iter(count);
let sort_by_count = |&(_crate, count): &_| Reverse(count);
sort.select_nth_unstable_by_key(N - 1, sort_by_count);
sort[..N].sort_unstable_by_key(sort_by_count);
for (id, count) in sort.iter().take(N) {
let crate_name = &crates.get(id).unwrap().name;
println!("{},{}", crate_name, count);
}
Ok(())
}
examples/industry-coefficient.rs (lines 25-27)
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
fn main() -> db_dump::Result<()> {
let mut crates: Map<CrateId, String> = Map::new();
let mut versions: Map<VersionId, CrateId> = Map::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.crates(|row| {
crates.insert(row.id, row.name);
})
.versions(|row| {
versions.insert(row.id, row.crate_id);
})
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
let max_date = version_downloads.iter().map(|row| row.date).max().unwrap();
let start_date = max_date - TimeDelta::try_weeks(6).unwrap();
// Add up downloads by crate by date
let mut downloads: Map<CrateId, Downloads> = Map::new();
for row in version_downloads {
// Deliberately cut out the largest date in the db-dump, because the
// data is partial.
if row.date >= start_date && row.date < max_date {
let crate_id = versions[&row.version_id];
let downloads = downloads.entry(crate_id).or_insert_with(Downloads::default);
match row.date.weekday() {
Weekday::Tue | Weekday::Wed | Weekday::Thu => downloads.weekday += row.downloads,
Weekday::Sat | Weekday::Sun => downloads.weekend += row.downloads,
// Disregard these to reduce some boundary effect from
// downloaders not being perfectly aligned with UTC.
Weekday::Mon | Weekday::Fri => {}
}
}
}
let mut downloads_vec = Vec::new();
let mut total = Downloads::default();
for (crate_id, downloads) in downloads {
total.weekday += downloads.weekday;
total.weekend += downloads.weekend;
let crate_name = &crates[&crate_id];
if downloads.weekend > 0
&& (downloads.weekday + downloads.weekend >= DOWNLOADS_CUTOFF || crate_name == "cxx")
{
let coefficient = downloads.weekday as f64 / downloads.weekend as f64;
downloads_vec.push((crate_name, coefficient));
}
}
let mean = total.weekday as f64 / total.weekend as f64;
downloads_vec.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
for (crate_name, coefficient) in downloads_vec {
println!("{:>36} {:+.4}", crate_name, coefficient - mean);
}
Ok(())
}
pub fn crates_categories(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn crates_keywords(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn default_versions(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
sourcepub fn dependencies(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn dependencies(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
Examples found in repository?
examples/user-dependencies.rs (line 27)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut user_id = None;
let mut crates = 0;
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crates(|_row| crates += 1)
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of version ids which depend directly on at least one crate by the
// user.
let mut dep_on_them = Set::new();
for dep in dependencies {
if their_crates.contains(&dep.crate_id) {
dep_on_them.insert(dep.version_id);
}
}
// Number of crates whose most recent version depends on at least one crate
// by the user.
let result = dep_on_them.intersection(&most_recent).count();
println!(
"{} / {} = {:.1}%",
result,
crates,
100.0 * result as f64 / crates as f64,
);
Ok(())
}
More examples
examples/top-crates.rs (line 25)
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut crates = Set::new();
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.crates(|row| {
crates.insert(row);
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of (version id, dependency crate id) pairs to avoid double-counting
// cases where a crate has both a normal dependency and dev-dependency or
// build-dependency on the same dependency crate.
let mut unique_dependency_edges = Set::<(VersionId, CrateId)>::new();
// Map of crate id to how many other crates' most recent version depends on
// that crate.
let mut count = Map::<CrateId, usize>::new();
for dep in dependencies {
if most_recent.contains(&dep.version_id)
&& unique_dependency_edges.insert((dep.version_id, dep.crate_id))
{
*count.entry(dep.crate_id).or_default() += 1;
}
}
// Quickselect and sort the top N crates by reverse dependency count.
let mut sort = Vec::from_iter(count);
let sort_by_count = |&(_crate, count): &_| Reverse(count);
sort.select_nth_unstable_by_key(N - 1, sort_by_count);
sort[..N].sort_unstable_by_key(sort_by_count);
for (id, count) in sort.iter().take(N) {
let crate_name = &crates.get(id).unwrap().name;
println!("{},{}", crate_name, count);
}
Ok(())
}
examples/user-dependencies-graph.rs (lines 22-27)
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut dependencies = Map::new();
let mut versions = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| {
dependencies
.entry(row.version_id)
.or_insert_with(Vec::new)
.push(row);
})
.versions(|row| {
if !row.yanked {
versions.push(row);
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
let mut total_deps = 0usize;
let mut their_deps = 0usize;
let mut last_printed_ratio = 0.0..=0.0;
let mut latest_version = Map::new();
versions.sort_by_key(|v| v.created_at);
for version in versions {
let no_deps = Vec::new();
if let Some(prev) = latest_version.insert(version.crate_id, version.id) {
for dep in dependencies.get(&prev).unwrap_or(&no_deps) {
total_deps -= 1;
their_deps -= their_crates.contains(&dep.crate_id) as usize;
}
}
for dep in dependencies.get(&version.id).unwrap_or(&no_deps) {
total_deps += 1;
their_deps += their_crates.contains(&dep.crate_id) as usize;
}
if total_deps != 0 {
let ratio = their_deps as f64 / total_deps as f64;
if !last_printed_ratio.contains(&ratio) {
println!("{},{:.3}", version.created_at.naive_utc(), ratio * 100.0);
last_printed_ratio = ratio * 0.99999..=ratio * 1.00001;
}
}
}
eprintln!(
"{} / {} ({:.02}%)",
their_deps,
total_deps,
(their_deps as f64 / total_deps as f64) * 100.0,
);
Ok(())
}
pub fn keywords(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn metadata(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn reserved_crate_names(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn teams(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
sourcepub fn users(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn users(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
Examples found in repository?
examples/user-downloads.rs (lines 24-28)
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids of all of those crates.
let mut their_versions = Set::new();
for version in versions {
if their_crates.contains(&version.crate_id) {
their_versions.insert(version.id);
}
}
// Add up downloads across that user's crates, as well as total downloads of
// all crates.
let mut downloads = Map::<Date<Utc>, Downloads>::new();
for stat in version_downloads {
let entry = downloads.entry(stat.date).or_default();
entry.all += stat.downloads;
if their_versions.contains(&stat.version_id) {
entry.theirs += stat.downloads;
}
}
// Print user's downloads as a fraction of total crates.io downloads by day.
for (date, downloads) in downloads {
if downloads.theirs > 0 {
println!(
"{},{}",
date,
downloads.theirs as f64 / downloads.all as f64,
);
}
}
Ok(())
}
More examples
examples/user-dependencies.rs (lines 20-24)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut user_id = None;
let mut crates = 0;
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crates(|_row| crates += 1)
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of version ids which depend directly on at least one crate by the
// user.
let mut dep_on_them = Set::new();
for dep in dependencies {
if their_crates.contains(&dep.crate_id) {
dep_on_them.insert(dep.version_id);
}
}
// Number of crates whose most recent version depends on at least one crate
// by the user.
let result = dep_on_them.intersection(&most_recent).count();
println!(
"{} / {} = {:.1}%",
result,
crates,
100.0 * result as f64 / crates as f64,
);
Ok(())
}
examples/user-dependencies-graph.rs (lines 16-20)
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut dependencies = Map::new();
let mut versions = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| {
dependencies
.entry(row.version_id)
.or_insert_with(Vec::new)
.push(row);
})
.versions(|row| {
if !row.yanked {
versions.push(row);
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
let mut total_deps = 0usize;
let mut their_deps = 0usize;
let mut last_printed_ratio = 0.0..=0.0;
let mut latest_version = Map::new();
versions.sort_by_key(|v| v.created_at);
for version in versions {
let no_deps = Vec::new();
if let Some(prev) = latest_version.insert(version.crate_id, version.id) {
for dep in dependencies.get(&prev).unwrap_or(&no_deps) {
total_deps -= 1;
their_deps -= their_crates.contains(&dep.crate_id) as usize;
}
}
for dep in dependencies.get(&version.id).unwrap_or(&no_deps) {
total_deps += 1;
their_deps += their_crates.contains(&dep.crate_id) as usize;
}
if total_deps != 0 {
let ratio = their_deps as f64 / total_deps as f64;
if !last_printed_ratio.contains(&ratio) {
println!("{},{:.3}", version.created_at.naive_utc(), ratio * 100.0);
last_printed_ratio = ratio * 0.99999..=ratio * 1.00001;
}
}
}
eprintln!(
"{} / {} ({:.02}%)",
their_deps,
total_deps,
(their_deps as f64 / total_deps as f64) * 100.0,
);
Ok(())
}
sourcepub fn version_downloads(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn version_downloads(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
Examples found in repository?
examples/total-downloads.rs (lines 13-15)
10 11 12 13 14 15 16 17 18 19 20 21 22 23
fn main() -> db_dump::Result<()> {
let mut downloads = Map::<Date<Utc>, u64>::new();
db_dump::Loader::new()
.version_downloads(|row| {
*downloads.entry(row.date).or_default() += row.downloads;
})
.load("./db-dump.tar.gz")?;
for (date, count) in downloads {
println!("{},{}", date, count);
}
Ok(())
}
More examples
examples/crate-downloads.rs (line 22)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
fn main() -> db_dump::Result<()> {
let mut crate_id = None;
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.crates(|row| {
if row.name == CRATE {
crate_id = Some(row.id);
}
})
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// Crate id of the crate we care about.
let crate_id = crate_id.expect("no such crate");
// Set of all version ids corresponding to that crate.
let mut version_ids = Set::new();
for version in versions {
if version.crate_id == crate_id {
version_ids.insert(version.id);
}
}
// Add up downloads across all version of the crate by day.
let mut downloads = Map::<Date<Utc>, u64>::new();
for stat in version_downloads {
if version_ids.contains(&stat.version_id) {
*downloads.entry(stat.date).or_default() += stat.downloads;
}
}
for (date, count) in downloads {
println!("{},{}", date, count);
}
Ok(())
}
examples/user-downloads.rs (line 31)
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids of all of those crates.
let mut their_versions = Set::new();
for version in versions {
if their_crates.contains(&version.crate_id) {
their_versions.insert(version.id);
}
}
// Add up downloads across that user's crates, as well as total downloads of
// all crates.
let mut downloads = Map::<Date<Utc>, Downloads>::new();
for stat in version_downloads {
let entry = downloads.entry(stat.date).or_default();
entry.all += stat.downloads;
if their_versions.contains(&stat.version_id) {
entry.theirs += stat.downloads;
}
}
// Print user's downloads as a fraction of total crates.io downloads by day.
for (date, downloads) in downloads {
if downloads.theirs > 0 {
println!(
"{},{}",
date,
downloads.theirs as f64 / downloads.all as f64,
);
}
}
Ok(())
}
examples/industry-coefficient.rs (line 31)
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
fn main() -> db_dump::Result<()> {
let mut crates: Map<CrateId, String> = Map::new();
let mut versions: Map<VersionId, CrateId> = Map::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.crates(|row| {
crates.insert(row.id, row.name);
})
.versions(|row| {
versions.insert(row.id, row.crate_id);
})
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
let max_date = version_downloads.iter().map(|row| row.date).max().unwrap();
let start_date = max_date - TimeDelta::try_weeks(6).unwrap();
// Add up downloads by crate by date
let mut downloads: Map<CrateId, Downloads> = Map::new();
for row in version_downloads {
// Deliberately cut out the largest date in the db-dump, because the
// data is partial.
if row.date >= start_date && row.date < max_date {
let crate_id = versions[&row.version_id];
let downloads = downloads.entry(crate_id).or_insert_with(Downloads::default);
match row.date.weekday() {
Weekday::Tue | Weekday::Wed | Weekday::Thu => downloads.weekday += row.downloads,
Weekday::Sat | Weekday::Sun => downloads.weekend += row.downloads,
// Disregard these to reduce some boundary effect from
// downloaders not being perfectly aligned with UTC.
Weekday::Mon | Weekday::Fri => {}
}
}
}
let mut downloads_vec = Vec::new();
let mut total = Downloads::default();
for (crate_id, downloads) in downloads {
total.weekday += downloads.weekday;
total.weekend += downloads.weekend;
let crate_name = &crates[&crate_id];
if downloads.weekend > 0
&& (downloads.weekday + downloads.weekend >= DOWNLOADS_CUTOFF || crate_name == "cxx")
{
let coefficient = downloads.weekday as f64 / downloads.weekend as f64;
downloads_vec.push((crate_name, coefficient));
}
}
let mean = total.weekday as f64 / total.weekend as f64;
downloads_vec.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
for (crate_name, coefficient) in downloads_vec {
println!("{:>36} {:+.4}", crate_name, coefficient - mean);
}
Ok(())
}
sourcepub fn versions(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
pub fn versions(&mut self, f: impl FnMut(Row) + 'a) -> &mut Self
Examples found in repository?
examples/crate-downloads.rs (line 21)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
fn main() -> db_dump::Result<()> {
let mut crate_id = None;
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.crates(|row| {
if row.name == CRATE {
crate_id = Some(row.id);
}
})
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// Crate id of the crate we care about.
let crate_id = crate_id.expect("no such crate");
// Set of all version ids corresponding to that crate.
let mut version_ids = Set::new();
for version in versions {
if version.crate_id == crate_id {
version_ids.insert(version.id);
}
}
// Add up downloads across all version of the crate by day.
let mut downloads = Map::<Date<Utc>, u64>::new();
for stat in version_downloads {
if version_ids.contains(&stat.version_id) {
*downloads.entry(stat.date).or_default() += stat.downloads;
}
}
for (date, count) in downloads {
println!("{},{}", date, count);
}
Ok(())
}
More examples
examples/user-downloads.rs (line 30)
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids of all of those crates.
let mut their_versions = Set::new();
for version in versions {
if their_crates.contains(&version.crate_id) {
their_versions.insert(version.id);
}
}
// Add up downloads across that user's crates, as well as total downloads of
// all crates.
let mut downloads = Map::<Date<Utc>, Downloads>::new();
for stat in version_downloads {
let entry = downloads.entry(stat.date).or_default();
entry.all += stat.downloads;
if their_versions.contains(&stat.version_id) {
entry.theirs += stat.downloads;
}
}
// Print user's downloads as a fraction of total crates.io downloads by day.
for (date, downloads) in downloads {
if downloads.theirs > 0 {
println!(
"{},{}",
date,
downloads.theirs as f64 / downloads.all as f64,
);
}
}
Ok(())
}
examples/user-dependencies.rs (lines 28-37)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut user_id = None;
let mut crates = 0;
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crates(|_row| crates += 1)
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of version ids which depend directly on at least one crate by the
// user.
let mut dep_on_them = Set::new();
for dep in dependencies {
if their_crates.contains(&dep.crate_id) {
dep_on_them.insert(dep.version_id);
}
}
// Number of crates whose most recent version depends on at least one crate
// by the user.
let result = dep_on_them.intersection(&most_recent).count();
println!(
"{} / {} = {:.1}%",
result,
crates,
100.0 * result as f64 / crates as f64,
);
Ok(())
}
examples/top-crates.rs (lines 26-35)
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut crates = Set::new();
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.crates(|row| {
crates.insert(row);
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of (version id, dependency crate id) pairs to avoid double-counting
// cases where a crate has both a normal dependency and dev-dependency or
// build-dependency on the same dependency crate.
let mut unique_dependency_edges = Set::<(VersionId, CrateId)>::new();
// Map of crate id to how many other crates' most recent version depends on
// that crate.
let mut count = Map::<CrateId, usize>::new();
for dep in dependencies {
if most_recent.contains(&dep.version_id)
&& unique_dependency_edges.insert((dep.version_id, dep.crate_id))
{
*count.entry(dep.crate_id).or_default() += 1;
}
}
// Quickselect and sort the top N crates by reverse dependency count.
let mut sort = Vec::from_iter(count);
let sort_by_count = |&(_crate, count): &_| Reverse(count);
sort.select_nth_unstable_by_key(N - 1, sort_by_count);
sort[..N].sort_unstable_by_key(sort_by_count);
for (id, count) in sort.iter().take(N) {
let crate_name = &crates.get(id).unwrap().name;
println!("{},{}", crate_name, count);
}
Ok(())
}
examples/user-dependencies-graph.rs (lines 28-32)
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut dependencies = Map::new();
let mut versions = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| {
dependencies
.entry(row.version_id)
.or_insert_with(Vec::new)
.push(row);
})
.versions(|row| {
if !row.yanked {
versions.push(row);
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
let mut total_deps = 0usize;
let mut their_deps = 0usize;
let mut last_printed_ratio = 0.0..=0.0;
let mut latest_version = Map::new();
versions.sort_by_key(|v| v.created_at);
for version in versions {
let no_deps = Vec::new();
if let Some(prev) = latest_version.insert(version.crate_id, version.id) {
for dep in dependencies.get(&prev).unwrap_or(&no_deps) {
total_deps -= 1;
their_deps -= their_crates.contains(&dep.crate_id) as usize;
}
}
for dep in dependencies.get(&version.id).unwrap_or(&no_deps) {
total_deps += 1;
their_deps += their_crates.contains(&dep.crate_id) as usize;
}
if total_deps != 0 {
let ratio = their_deps as f64 / total_deps as f64;
if !last_printed_ratio.contains(&ratio) {
println!("{},{:.3}", version.created_at.naive_utc(), ratio * 100.0);
last_printed_ratio = ratio * 0.99999..=ratio * 1.00001;
}
}
}
eprintln!(
"{} / {} ({:.02}%)",
their_deps,
total_deps,
(their_deps as f64 / total_deps as f64) * 100.0,
);
Ok(())
}
examples/industry-coefficient.rs (lines 28-30)
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
fn main() -> db_dump::Result<()> {
let mut crates: Map<CrateId, String> = Map::new();
let mut versions: Map<VersionId, CrateId> = Map::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.crates(|row| {
crates.insert(row.id, row.name);
})
.versions(|row| {
versions.insert(row.id, row.crate_id);
})
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
let max_date = version_downloads.iter().map(|row| row.date).max().unwrap();
let start_date = max_date - TimeDelta::try_weeks(6).unwrap();
// Add up downloads by crate by date
let mut downloads: Map<CrateId, Downloads> = Map::new();
for row in version_downloads {
// Deliberately cut out the largest date in the db-dump, because the
// data is partial.
if row.date >= start_date && row.date < max_date {
let crate_id = versions[&row.version_id];
let downloads = downloads.entry(crate_id).or_insert_with(Downloads::default);
match row.date.weekday() {
Weekday::Tue | Weekday::Wed | Weekday::Thu => downloads.weekday += row.downloads,
Weekday::Sat | Weekday::Sun => downloads.weekend += row.downloads,
// Disregard these to reduce some boundary effect from
// downloaders not being perfectly aligned with UTC.
Weekday::Mon | Weekday::Fri => {}
}
}
}
let mut downloads_vec = Vec::new();
let mut total = Downloads::default();
for (crate_id, downloads) in downloads {
total.weekday += downloads.weekday;
total.weekend += downloads.weekend;
let crate_name = &crates[&crate_id];
if downloads.weekend > 0
&& (downloads.weekday + downloads.weekend >= DOWNLOADS_CUTOFF || crate_name == "cxx")
{
let coefficient = downloads.weekday as f64 / downloads.weekend as f64;
downloads_vec.push((crate_name, coefficient));
}
}
let mean = total.weekday as f64 / total.weekend as f64;
downloads_vec.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
for (crate_name, coefficient) in downloads_vec {
println!("{:>36} {:+.4}", crate_name, coefficient - mean);
}
Ok(())
}
sourcepub fn load(&mut self, path: impl AsRef<Path>) -> Result<()>
pub fn load(&mut self, path: impl AsRef<Path>) -> Result<()>
Examples found in repository?
examples/total-downloads.rs (line 16)
10 11 12 13 14 15 16 17 18 19 20 21 22 23
fn main() -> db_dump::Result<()> {
let mut downloads = Map::<Date<Utc>, u64>::new();
db_dump::Loader::new()
.version_downloads(|row| {
*downloads.entry(row.date).or_default() += row.downloads;
})
.load("./db-dump.tar.gz")?;
for (date, count) in downloads {
println!("{},{}", date, count);
}
Ok(())
}
More examples
examples/crate-downloads.rs (line 23)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
fn main() -> db_dump::Result<()> {
let mut crate_id = None;
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.crates(|row| {
if row.name == CRATE {
crate_id = Some(row.id);
}
})
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// Crate id of the crate we care about.
let crate_id = crate_id.expect("no such crate");
// Set of all version ids corresponding to that crate.
let mut version_ids = Set::new();
for version in versions {
if version.crate_id == crate_id {
version_ids.insert(version.id);
}
}
// Add up downloads across all version of the crate by day.
let mut downloads = Map::<Date<Utc>, u64>::new();
for stat in version_downloads {
if version_ids.contains(&stat.version_id) {
*downloads.entry(stat.date).or_default() += stat.downloads;
}
}
for (date, count) in downloads {
println!("{},{}", date, count);
}
Ok(())
}
examples/user-downloads.rs (line 32)
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut versions = Vec::new();
let mut version_downloads = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.versions(|row| versions.push(row))
.version_downloads(|row| version_downloads.push(row))
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids of all of those crates.
let mut their_versions = Set::new();
for version in versions {
if their_crates.contains(&version.crate_id) {
their_versions.insert(version.id);
}
}
// Add up downloads across that user's crates, as well as total downloads of
// all crates.
let mut downloads = Map::<Date<Utc>, Downloads>::new();
for stat in version_downloads {
let entry = downloads.entry(stat.date).or_default();
entry.all += stat.downloads;
if their_versions.contains(&stat.version_id) {
entry.theirs += stat.downloads;
}
}
// Print user's downloads as a fraction of total crates.io downloads by day.
for (date, downloads) in downloads {
if downloads.theirs > 0 {
println!(
"{},{}",
date,
downloads.theirs as f64 / downloads.all as f64,
);
}
}
Ok(())
}
examples/user-dependencies.rs (line 38)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut user_id = None;
let mut crates = 0;
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crates(|_row| crates += 1)
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of version ids which depend directly on at least one crate by the
// user.
let mut dep_on_them = Set::new();
for dep in dependencies {
if their_crates.contains(&dep.crate_id) {
dep_on_them.insert(dep.version_id);
}
}
// Number of crates whose most recent version depends on at least one crate
// by the user.
let result = dep_on_them.intersection(&most_recent).count();
println!(
"{} / {} = {:.1}%",
result,
crates,
100.0 * result as f64 / crates as f64,
);
Ok(())
}
examples/top-crates.rs (line 36)
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
fn main() -> db_dump::Result<()> {
// Map of crate id to the most recently published version of that crate.
let mut most_recent = Map::new();
let mut crates = Set::new();
let mut crate_owners = Vec::new();
let mut dependencies = Vec::new();
db_dump::Loader::new()
.crates(|row| {
crates.insert(row);
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| dependencies.push(row))
.versions(|row| match most_recent.entry(row.crate_id) {
Entry::Vacant(entry) => {
entry.insert(row);
}
Entry::Occupied(mut entry) => {
if row.created_at > entry.get().created_at {
entry.insert(row);
}
}
})
.load("./db-dump.tar.gz")?;
// Set of version ids which are the most recently published of their crate.
let most_recent = Set::from_iter(most_recent.values().map(|version| version.id));
// Set of (version id, dependency crate id) pairs to avoid double-counting
// cases where a crate has both a normal dependency and dev-dependency or
// build-dependency on the same dependency crate.
let mut unique_dependency_edges = Set::<(VersionId, CrateId)>::new();
// Map of crate id to how many other crates' most recent version depends on
// that crate.
let mut count = Map::<CrateId, usize>::new();
for dep in dependencies {
if most_recent.contains(&dep.version_id)
&& unique_dependency_edges.insert((dep.version_id, dep.crate_id))
{
*count.entry(dep.crate_id).or_default() += 1;
}
}
// Quickselect and sort the top N crates by reverse dependency count.
let mut sort = Vec::from_iter(count);
let sort_by_count = |&(_crate, count): &_| Reverse(count);
sort.select_nth_unstable_by_key(N - 1, sort_by_count);
sort[..N].sort_unstable_by_key(sort_by_count);
for (id, count) in sort.iter().take(N) {
let crate_name = &crates.get(id).unwrap().name;
println!("{},{}", crate_name, count);
}
Ok(())
}
examples/user-dependencies-graph.rs (line 33)
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
fn main() -> db_dump::Result<()> {
let mut user_id = None;
let mut crate_owners = Vec::new();
let mut dependencies = Map::new();
let mut versions = Vec::new();
db_dump::Loader::new()
.users(|row| {
if row.gh_login == USER {
user_id = Some(row.id);
}
})
.crate_owners(|row| crate_owners.push(row))
.dependencies(|row| {
dependencies
.entry(row.version_id)
.or_insert_with(Vec::new)
.push(row);
})
.versions(|row| {
if !row.yanked {
versions.push(row);
}
})
.load("./db-dump.tar.gz")?;
// User id of the crate author we care about.
let user_id = user_id.expect("no such user");
// Set of crate ids currently owned by that user.
let mut their_crates = Set::new();
for crate_owner in crate_owners {
if crate_owner.owner_id == user_id {
their_crates.insert(crate_owner.crate_id);
}
}
let mut total_deps = 0usize;
let mut their_deps = 0usize;
let mut last_printed_ratio = 0.0..=0.0;
let mut latest_version = Map::new();
versions.sort_by_key(|v| v.created_at);
for version in versions {
let no_deps = Vec::new();
if let Some(prev) = latest_version.insert(version.crate_id, version.id) {
for dep in dependencies.get(&prev).unwrap_or(&no_deps) {
total_deps -= 1;
their_deps -= their_crates.contains(&dep.crate_id) as usize;
}
}
for dep in dependencies.get(&version.id).unwrap_or(&no_deps) {
total_deps += 1;
their_deps += their_crates.contains(&dep.crate_id) as usize;
}
if total_deps != 0 {
let ratio = their_deps as f64 / total_deps as f64;
if !last_printed_ratio.contains(&ratio) {
println!("{},{:.3}", version.created_at.naive_utc(), ratio * 100.0);
last_printed_ratio = ratio * 0.99999..=ratio * 1.00001;
}
}
}
eprintln!(
"{} / {} ({:.02}%)",
their_deps,
total_deps,
(their_deps as f64 / total_deps as f64) * 100.0,
);
Ok(())
}
Additional examples can be found in:
Trait Implementations§
Auto Trait Implementations§
impl<'a> Freeze for Loader<'a>
impl<'a> !RefUnwindSafe for Loader<'a>
impl<'a> !Send for Loader<'a>
impl<'a> !Sync for Loader<'a>
impl<'a> Unpin for Loader<'a>
impl<'a> !UnwindSafe for Loader<'a>
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more