use anyhow::Result;
use url::Url;
use std::path::PathBuf;
use serde::Deserialize;
use crate::git::Git;
const REPO_DIR: &str = "mined";
struct LocalGit {
path: PathBuf,
info: RepoInfo,
}
#[allow(dead_code)]
#[derive(Debug, Deserialize)]
struct RepoInfo {
created_at: String,
description: String,
documentation: String,
downloads: u64,
homepage: String,
id: u64,
max_upload_size: Option<u64>,
name: String,
readme: String,
repository: String,
updated_at: String,
}
type CratesInfo = Vec<RepoInfo>;
pub struct CratesIO {
repos: Vec<LocalGit>,
}
impl CratesIO {
pub fn apply<F>(&self, f: F) -> Result<()>
where F: Fn(&PathBuf) -> Result<()> {
for repo in &self.repos {
f(&repo.path)?;
}
Ok(())
}
pub fn init_local_repos(csv: String, names: Option<Vec<String>>, username: String, out_dir: Option<String>) -> Result<CratesIO> {
let crates = Self::init(csv, names)?;
Self::download_local(crates, &username, out_dir)
}
fn init(csv: String, subset_repos: Option<Vec<String>>) -> Result<CratesInfo> {
let mut crates = Self::read_cratesio_csv(csv)?;
crates.sort_by(|a,b| b.downloads.cmp(&a.downloads));
if let Some(allowed) = subset_repos {
let crates = crates
.into_iter()
.filter(|c| allowed.contains(&c.name))
.collect();
return Ok(crates)
}
Ok(crates)
}
fn download_local(crates: CratesInfo, ssh_username: &str, dir: Option<String>) -> Result<Self> {
let mut git = Git::init(ssh_username);
let dir = dir.unwrap_or(REPO_DIR.to_string());
let git_exists = |info: &RepoInfo| -> bool {
reqwest::blocking::get(&info.repository)
.map(|g| g.status().is_success()) .unwrap_or(false) };
let mut repos = vec![];
for info in crates.into_iter() {
if !git_exists(&info) {
println!("Ignoring stale repo [{}] for [{}]", &info.repository, &info.name);
continue;
}
let clone = Self::download(&info, &mut git, dir.as_str());
if let Ok(path) = clone {
repos.push(LocalGit { path, info });
} else {
println!("!! Git clone failed [{}]", &info.repository);
}
}
Ok(CratesIO { repos })
}
fn already_local_git(path: &PathBuf) -> bool {
path.exists() && Git::is_repo(path)
}
fn download(info: &RepoInfo, git: &mut Git, dir: &str) -> Result<PathBuf> {
let mut location = PathBuf::new();
location.push(dir);
location.push(info.name.clone());
if Self::already_local_git(&location) {
println!("Already local: [{}] at [{:?}]", info.repository, location);
return Ok(location);
}
println!("Cloning: [{}] to [{:?}]", info.repository, location);
let url = Url::parse(&info.repository)?;
git.clone(&url, &location)?;
assert!(Self::already_local_git(&location));
Ok(location)
}
fn read_cratesio_csv(path: String) -> Result<CratesInfo> {
let all_fields = vec![
"created_at",
"description",
"documentation",
"downloads",
"homepage",
"id",
"max_upload_size",
"name",
"readme",
"repository",
"updated_at",
];
let mut records = vec![];
let mut rdr = csv::Reader::from_path(path)?;
let headers = rdr.headers()?.clone();
let fields = csv::StringRecord::from(all_fields);
assert!(fields == headers);
for r in rdr.records() {
let row: RepoInfo = r?.deserialize(Some(&fields))?;
records.push(row)
}
Ok(records)
}
}