1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10const URL_REGEXES: [&str; 2] = [
11 "^https://(github.com)/([^/]+)/([^/]+)/?.*$",
12 "^https://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
13];
14
15#[derive(Debug, PartialEq)]
16#[allow(dead_code)]
17pub struct Repository {
18 pub host: String,
19 pub owner: String,
20 pub repo: String,
21}
22
23#[allow(dead_code)]
24impl Repository {
25 fn new(host: &str, owner: &str, repo: &str) -> Self {
26 Self {
27 host: host.to_string(),
28 owner: owner.to_string(),
29 repo: repo.to_string(),
30 }
31 }
32
33 pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
41 static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
42 URL_REGEXES
43 .iter()
44 .map(|reg| Regex::new(reg).unwrap())
45 .collect::<Vec<Regex>>()
46 });
47
48 for re in REGS.iter() {
49 if let Some(repo_url) = re.captures(url) {
50 let host = repo_url[1].to_lowercase();
51 let owner = repo_url[2].to_lowercase();
52 let repo = repo_url[3].to_lowercase();
53 return Ok(Self { host, owner, repo });
54 }
55 }
56 Err(format!("No match for repo in '{}'", &url).into())
57 }
58
59 pub fn url(&self) -> String {
60 format!("https://{}/{}/{}", self.host, self.owner, self.repo)
61 }
62
63 pub fn path(&self, root: &Path) -> PathBuf {
64 root.join(&self.host).join(&self.owner).join(&self.repo)
65 }
66}
67
68pub fn update_single_repository(
70 repos_folder: &Path,
71 host: &str,
72 owner: &str,
73 repo: &str,
74 repository_url: &str,
75 clone: bool,
76) -> Result<(), Box<dyn Error>> {
77 let owner_path = repos_folder.join(host).join(owner);
78 let current_dir = env::current_dir()?;
79 log::info!(
80 "Creating owner_path {:?} while current_dir is {:?}",
81 &owner_path,
82 ¤t_dir
83 );
84 fs::create_dir_all(&owner_path)?;
85 let repo_path = owner_path.join(repo);
86 if Path::new(&repo_path).exists() {
87 if clone {
88 log::info!("repo exist but we only clone now. Skipping.");
89 } else {
90 log::info!("repo exist; cd to {:?}", &repo_path);
91 env::set_current_dir(&repo_path)?;
92 git_pull();
93 }
94 } else {
95 log::info!("new repo; cd to {:?}", &owner_path);
96 env::set_current_dir(owner_path)?;
97 git_clone(repository_url, repo);
98 }
99 env::set_current_dir(current_dir)?;
100 Ok(())
101}
102
103fn git_pull() {
104 log::info!("git pull");
105 let current_dir = env::current_dir().unwrap();
106
107 match Command::new("git").arg("pull").output() {
108 Ok(result) => {
109 if result.status.success() {
110 log::info!(
111 "git_pull exit code: '{}' in folder {:?}",
112 result.status,
113 current_dir
114 );
115 } else {
116 log::warn!(
117 "git_pull exit code: '{}' in folder {:?}",
118 result.status,
119 current_dir
120 );
121 }
122 }
123 Err(err) => log::error!("Could not run git_pull in folder {current_dir:?} error: {err}"),
124 }
125}
126
127fn git_clone(url: &str, path: &str) {
128 log::info!("git clone {} {}", url, path);
129 match Command::new("git").arg("clone").arg(url).arg(path).output() {
130 Ok(result) => {
131 if result.status.success() {
132 log::info!("git_clone exit code: '{}'", result.status);
133 } else {
134 log::warn!(
135 "git_clone exit code: '{}' for url '{}' cloning to '{}'",
136 result.status,
137 url,
138 path
139 );
140 }
141 }
142 Err(err) => log::error!("Could not run git_clone {url} {path} error: {err}"),
143 }
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149
150 #[test]
151 fn test_get_owner_and_repo() {
152 let root = Path::new("/tmp");
153 let expected = Repository::new("github.com", "szabgab", "rust-digger");
154
155 let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
156 assert_eq!(repo, expected);
157 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
158 assert_eq!(
159 repo.path(root).to_str(),
160 Some("/tmp/github.com/szabgab/rust-digger")
161 );
162
163 let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
164 assert_eq!(repo, expected);
165 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
166
167 let repo = Repository::from_url(
168 "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
169 )
170 .unwrap();
171 assert_eq!(
172 repo,
173 Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
174 );
175 assert_eq!(
176 repo.url(),
177 "https://github.com/crypto-crawler/crypto-crawler-rs"
178 );
179
180 let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
181 assert_eq!(
182 repo,
183 Repository::new("gitlab.com", "szabgab", "rust-digger")
184 );
185 assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
186
187 let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
188 assert_eq!(
189 repo,
190 Repository::new("gitlab.com", "szabgab", "rust-digger")
191 );
192
193 let res = Repository::from_url("https://blabla.com/");
194 assert!(res.is_err());
195 assert_eq!(
196 res.unwrap_err().to_string(),
197 "No match for repo in 'https://blabla.com/'"
198 );
199 }
200}