use std::env;
use std::error::Error;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use once_cell::sync::Lazy;
use regex::Regex;
#[derive(Debug, serde::Serialize, serde::Deserialize)]
#[non_exhaustive]
pub enum RepoPlatform {
GitHub, GitLab, Gitea, Cgit, Forgejo, Fossil, Mercurial, Gogs, }
const URL_REGEXES: [&str; 5] = [
"^https?://(github.com)/([^/]+)/([^/]+)/?.*$",
"^https?://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
"^https?://(salsa.debian.org)/([^/]+)/([^/]+)/?.*$",
r"^https?://(bitbucket.org)/([^/]+)/([^/]+)/?.*$",
r"^https?://(codeberg.org)/([^/]+)/([^/]+)(/.*)?$",
];
#[derive(Debug, PartialEq)]
#[allow(dead_code)]
pub struct Repository {
host: String,
owner: String,
repo: String,
}
#[allow(dead_code)]
impl Repository {
pub fn new(host: &str, owner: &str, repo: &str) -> Self {
Self {
host: host.to_string(),
owner: owner.to_string(),
repo: repo.to_string(),
}
}
pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
URL_REGEXES
.iter()
.map(|reg| Regex::new(reg).unwrap())
.collect::<Vec<Regex>>()
});
for re in REGS.iter() {
if let Some(repo_url) = re.captures(url) {
let host = repo_url[1].to_lowercase();
let owner = repo_url[2].to_lowercase();
let repo = repo_url[3].to_lowercase();
return Ok(Self { host, owner, repo });
}
}
Err(format!("No match for repo in '{}'", &url).into())
}
pub fn url(&self) -> String {
format!("https://{}/{}/{}", self.host, self.owner, self.repo)
}
pub fn path(&self, root: &Path) -> PathBuf {
self.owner_path(root).join(&self.repo)
}
pub fn owner_path(&self, root: &Path) -> PathBuf {
root.join(&self.host).join(&self.owner)
}
pub fn get_owner(&self) -> &str {
&self.owner
}
pub fn is_github(&self) -> bool {
&self.host == "github.com"
}
pub fn is_gitlab(&self) -> bool {
["gitlab.com", "salsa.debian.org"].contains(&self.host.as_str())
}
pub fn is_bitbucket(&self) -> bool {
&self.host == "bitbucket.org"
}
pub fn has_github_actions(&self, root: &Path) -> bool {
if !self.is_github() {
return false;
}
let path = self.path(root);
let dot_github = path.join(".github");
if !dot_github.exists() {
return false;
}
let workflow_dir = dot_github.join("workflows");
if !workflow_dir.exists() {
return false;
}
if let Ok(entries) = workflow_dir.read_dir() {
let yaml_count = entries
.filter_map(|entry| entry.ok())
.filter(|entry| {
entry
.path()
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext == "yml" || ext == "yaml")
.unwrap_or(false)
})
.count();
if yaml_count > 0 {
return true;
}
}
false
}
pub fn has_dependabot(&self, root: &Path) -> bool {
if !self.is_github() {
return false;
}
let path = self.path(root);
let dot_github = path.join(".github");
if !dot_github.exists() {
return false;
}
let dependabot_file = dot_github.join("dependabot.yml");
dependabot_file.exists()
}
pub fn has_gitlab_pipeline(&self, root: &Path) -> bool {
if !self.is_gitlab() {
return false;
}
let path = self.path(root);
let ci_file = path.join(".gitlab-ci.yml");
ci_file.exists()
}
pub fn has_bitbucket_pipeline(&self, root: &Path) -> bool {
if !self.is_bitbucket() {
return false;
}
let path = self.path(root);
let ci_file = path.join("bitbucket-pipelines.yml");
ci_file.exists()
}
pub fn has_circle_ci(&self, root: &Path) -> bool {
if !self.is_github() {
return false;
}
let path = self.path(root);
let ci_folder = path.join(".circleci");
ci_folder.exists()
}
pub fn has_cirrus_ci(&self, root: &Path) -> bool {
if !self.is_github() {
return false;
}
let path = self.path(root);
let ci_folder = path.join(".cirrusci");
ci_folder.exists()
}
pub fn has_travis(&self, root: &Path) -> bool {
if !self.is_github() {
return false;
}
let path = self.path(root);
let ci_file = path.join(".travis.yaml");
ci_file.exists()
}
pub fn has_jenkins(&self, root: &Path) -> bool {
let path = self.path(root);
let ci_file = path.join("Jenkinsfile");
ci_file.exists()
}
pub fn has_appveyor(&self, root: &Path) -> bool {
let path = self.path(root);
let ci_file_1 = path.join("appveyor.yml");
let ci_file_2 = path.join(".appveyor.yml");
ci_file_1.exists() || ci_file_2.exists()
}
pub fn update_repository(
&self,
root: &Path,
clone: bool,
depth: Option<usize>,
) -> Result<(), Box<dyn Error>> {
let owner_path = self.owner_path(root);
let current_dir = env::current_dir()?;
log::info!(
"Creating owner_path {:?} while current_dir is {:?}",
&owner_path,
¤t_dir
);
fs::create_dir_all(&owner_path)?;
let repo_path = self.path(root);
if Path::new(&repo_path).exists() {
if clone {
log::info!("repo exist but we only clone now. Skipping.");
} else {
log::info!("repo exist; cd to {:?}", &repo_path);
env::set_current_dir(&repo_path)?;
self.git_pull();
}
} else {
log::info!("new repo; cd to {:?}", &owner_path);
env::set_current_dir(owner_path)?;
self.git_clone(depth);
}
env::set_current_dir(current_dir)?;
Ok(())
}
fn git_pull(&self) {
if !self.check_url() {
log::error!("Repository URL is not reachable: {}", self.url());
return;
}
let current_dir = env::current_dir().unwrap();
log::info!("git pull in {current_dir:?}");
match Command::new("git").arg("pull").output() {
Ok(result) => {
if result.status.success() {
log::info!(
"git_pull exit code: '{}' in folder {:?}",
result.status,
current_dir
);
} else {
log::warn!(
"git_pull exit code: '{}' in folder {:?}",
result.status,
current_dir
);
}
}
Err(err) => {
log::error!("Could not run git_pull in folder {current_dir:?} error: {err}")
}
}
}
fn git_clone(&self, depth: Option<usize>) {
if !self.check_url() {
log::error!("Repository URL is not reachable: {}", self.url());
return;
}
let current_dir = env::current_dir().unwrap();
let url = self.url();
log::info!("git clone {url} in {current_dir:?}");
let mut cmd = Command::new("git");
cmd.arg("clone");
if let Some(depth) = depth {
cmd.arg(format!("--depth={depth}"));
}
match cmd.arg(self.url()).output() {
Ok(result) => {
if result.status.success() {
log::info!("git_clone exit code: '{}'", result.status);
} else {
log::warn!(
"git_clone exit code: '{}' for url '{}' in '{current_dir:?}'",
result.status,
url,
);
}
}
Err(err) => {
log::error!("Could not run `git clone {url}` in {current_dir:?} error: {err}")
}
}
}
pub fn check_url(&self) -> bool {
let url = self.url();
let response = ureq::get(&url).call();
match response {
Ok(_) => true,
Err(err) => {
log::error!("Error checking URL '{}': {}", url, err);
false
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_owner_and_repo() {
let root = Path::new("/tmp");
let expected = Repository::new("github.com", "szabgab", "rust-digger");
let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
assert_eq!(repo, expected);
assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
assert_eq!(
repo.path(root).to_str(),
Some("/tmp/github.com/szabgab/rust-digger")
);
assert!(repo.is_github());
assert!(!repo.is_gitlab());
assert_eq!(repo.get_owner(), "szabgab");
let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
assert_eq!(repo, expected);
assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
assert!(repo.is_github());
let repo = Repository::from_url("http://github.com/szabgab/rust-digger/").unwrap();
assert_eq!(repo, expected);
assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
assert!(repo.is_github());
let repo = Repository::from_url(
"https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
)
.unwrap();
assert_eq!(
repo,
Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
);
assert_eq!(
repo.url(),
"https://github.com/crypto-crawler/crypto-crawler-rs"
);
assert!(repo.is_github());
let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
assert_eq!(
repo,
Repository::new("gitlab.com", "szabgab", "rust-digger")
);
assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
assert!(!repo.is_github());
assert!(repo.is_gitlab());
let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
assert_eq!(
repo,
Repository::new("gitlab.com", "szabgab", "rust-digger")
);
assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
assert_eq!(repo.owner, "szabgab");
assert_eq!(repo.repo, "rust-digger");
assert_eq!(
repo.path(root).to_str(),
Some("/tmp/gitlab.com/szabgab/rust-digger")
);
let repo = Repository::from_url("https://salsa.debian.org/szabgab/rust-digger/").unwrap();
assert_eq!(
repo,
Repository::new("salsa.debian.org", "szabgab", "rust-digger")
);
assert_eq!(repo.url(), "https://salsa.debian.org/szabgab/rust-digger");
assert_eq!(repo.owner, "szabgab");
assert_eq!(repo.repo, "rust-digger");
assert_eq!(
repo.path(root).to_str(),
Some("/tmp/salsa.debian.org/szabgab/rust-digger")
);
assert!(!repo.is_github());
assert!(repo.is_gitlab());
let res = Repository::from_url("https://blabla.com/");
assert!(res.is_err());
assert_eq!(
res.unwrap_err().to_string(),
"No match for repo in 'https://blabla.com/'"
);
let repo = Repository::from_url("https://bitbucket.org/szabgab/rust-digger/").unwrap();
assert_eq!(
repo,
Repository::new("bitbucket.org", "szabgab", "rust-digger")
);
let repo = Repository::from_url("https://codeberg.org/szabgab/rust-digger/").unwrap();
assert_eq!(
repo,
Repository::new("codeberg.org", "szabgab", "rust-digger")
);
}
#[test]
fn test_check_good_url() {
let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
assert!(repo.check_url());
}
#[test]
fn test_check_missing_url() {
let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
assert!(!repo.check_url());
}
#[test]
fn test_clone_missing_repo() {
let temp_folder = tempfile::tempdir().unwrap();
let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
repo.update_repository(Path::new(temp_folder.path()), true, None)
.unwrap();
let owner_path = temp_folder.path().join("github.com").join("szabgab");
assert!(owner_path.exists());
assert!(!owner_path.join("no-such-repo").exists());
}
#[test]
fn test_clone_this_repo() {
let temp_folder = tempfile::tempdir().unwrap();
let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
repo.update_repository(Path::new(temp_folder.path()), true, None)
.unwrap();
let owner_path = temp_folder.path().join("github.com").join("szabgab");
assert!(owner_path.exists());
assert!(owner_path.join("git-digger").exists());
}
}