1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10#[derive(Debug, serde::Serialize, serde::Deserialize)]
11#[non_exhaustive]
12pub enum RepoPlatform {
13 GitHub, GitLab, Gitea, Cgit, Forgejo, Fossil, Mercurial, Gogs, }
22
23const URL_REGEXES: [&str; 5] = [
24 "^https?://(github.com)/([^/]+)/([^/]+)/?.*$",
25 "^https?://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
26 "^https?://(salsa.debian.org)/([^/]+)/([^/]+)/?.*$",
27 r"^https?://(bitbucket.org)/([^/]+)/([^/]+)/?.*$",
28 r"^https?://(codeberg.org)/([^/]+)/([^/]+)(/.*)?$",
29];
30
31#[derive(Debug, PartialEq)]
32#[allow(dead_code)]
33pub struct Repository {
34 host: String,
35 owner: String,
36 repo: String,
37}
38
39#[allow(dead_code)]
40impl Repository {
41 pub fn new(host: &str, owner: &str, repo: &str) -> Self {
43 Self {
44 host: host.to_string(),
45 owner: owner.to_string(),
46 repo: repo.to_string(),
47 }
48 }
49
50 pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
58 static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
59 URL_REGEXES
60 .iter()
61 .map(|reg| Regex::new(reg).unwrap())
62 .collect::<Vec<Regex>>()
63 });
64
65 for re in REGS.iter() {
66 if let Some(repo_url) = re.captures(url) {
67 let host = repo_url[1].to_lowercase();
68 let owner = repo_url[2].to_lowercase();
69 let repo = repo_url[3].to_lowercase();
70 return Ok(Self { host, owner, repo });
71 }
72 }
73 Err(format!("No match for repo in '{}'", &url).into())
74 }
75
76 pub fn url(&self) -> String {
77 format!("https://{}/{}/{}", self.host, self.owner, self.repo)
78 }
79
80 pub fn path(&self, root: &Path) -> PathBuf {
81 self.owner_path(root).join(&self.repo)
82 }
83
84 pub fn owner_path(&self, root: &Path) -> PathBuf {
85 root.join(&self.host).join(&self.owner)
86 }
87
88 pub fn get_owner(&self) -> &str {
89 &self.owner
90 }
91
92 pub fn is_github(&self) -> bool {
93 &self.host == "github.com"
94 }
95
96 pub fn is_gitlab(&self) -> bool {
97 ["gitlab.com", "salsa.debian.org"].contains(&self.host.as_str())
98 }
99
100 pub fn is_bitbucket(&self) -> bool {
101 &self.host == "bitbucket.org"
102 }
103
104 pub fn has_github_actions(&self, root: &Path) -> bool {
105 if !self.is_github() {
106 return false;
107 }
108
109 let path = self.path(root);
110 let dot_github = path.join(".github");
111 if !dot_github.exists() {
112 return false;
113 }
114
115 let workflow_dir = dot_github.join("workflows");
116 if !workflow_dir.exists() {
117 return false;
118 }
119
120 if let Ok(entries) = workflow_dir.read_dir() {
121 let yaml_count = entries
122 .filter_map(|entry| entry.ok())
123 .filter(|entry| {
124 entry
125 .path()
126 .extension()
127 .and_then(|ext| ext.to_str())
128 .map(|ext| ext == "yml" || ext == "yaml")
129 .unwrap_or(false)
130 })
131 .count();
132 if yaml_count > 0 {
133 return true;
134 }
135 }
136
137 false
138 }
139
140 pub fn has_dependabot(&self, root: &Path) -> bool {
141 if !self.is_github() {
142 return false;
143 }
144
145 let path = self.path(root);
146 let dot_github = path.join(".github");
147
148 if !dot_github.exists() {
149 return false;
150 }
151
152 let dependabot_file = dot_github.join("dependabot.yml");
153 dependabot_file.exists()
154 }
155
156 pub fn has_gitlab_pipeline(&self, root: &Path) -> bool {
157 if !self.is_gitlab() {
158 return false;
159 }
160
161 let path = self.path(root);
162 let ci_file = path.join(".gitlab-ci.yml");
163
164 ci_file.exists()
165 }
166
167 pub fn has_bitbucket_pipeline(&self, root: &Path) -> bool {
168 if !self.is_bitbucket() {
169 return false;
170 }
171
172 let path = self.path(root);
173 let ci_file = path.join("bitbucket-pipelines.yml");
174 ci_file.exists()
175 }
176
177 pub fn has_circle_ci(&self, root: &Path) -> bool {
178 if !self.is_github() {
179 return false;
180 }
181
182 let path = self.path(root);
183 let ci_folder = path.join(".circleci");
184
185 ci_folder.exists()
186 }
187
188 pub fn has_cirrus_ci(&self, root: &Path) -> bool {
189 if !self.is_github() {
190 return false;
191 }
192
193 let path = self.path(root);
194 let ci_folder = path.join(".cirrusci");
195
196 ci_folder.exists()
197 }
198
199 pub fn has_travis(&self, root: &Path) -> bool {
200 if !self.is_github() {
201 return false;
202 }
203
204 let path = self.path(root);
205 let ci_file = path.join(".travis.yaml");
206
207 ci_file.exists()
208 }
209
210 pub fn has_jenkins(&self, root: &Path) -> bool {
211 let path = self.path(root);
212 let ci_file = path.join("Jenkinsfile");
213
214 ci_file.exists()
215 }
216
217 pub fn has_appveyor(&self, root: &Path) -> bool {
218 let path = self.path(root);
219 let ci_file_1 = path.join("appveyor.yml");
220 let ci_file_2 = path.join(".appveyor.yml");
221
222 ci_file_1.exists() || ci_file_2.exists()
223 }
224
225 pub fn update_repository(
228 &self,
229 root: &Path,
230 clone: bool,
231 depth: Option<usize>,
232 ) -> Result<(), Box<dyn Error>> {
233 let owner_path = self.owner_path(root);
234 let current_dir = env::current_dir()?;
235 log::info!(
236 "Creating owner_path {:?} while current_dir is {:?}",
237 &owner_path,
238 ¤t_dir
239 );
240 fs::create_dir_all(&owner_path)?;
241 let repo_path = self.path(root);
242 if Path::new(&repo_path).exists() {
243 if clone {
244 log::info!("repo exist but we only clone now. Skipping.");
245 } else {
246 log::info!("repo exist; cd to {:?}", &repo_path);
247 env::set_current_dir(&repo_path)?;
248 self.git_pull();
249 }
250 } else {
251 log::info!("new repo; cd to {:?}", &owner_path);
252 env::set_current_dir(owner_path)?;
253 self.git_clone(depth);
254 }
255 env::set_current_dir(current_dir)?;
256 Ok(())
257 }
258
259 fn git_pull(&self) {
260 if !self.check_url() {
261 log::error!("Repository URL is not reachable: {}", self.url());
262 return;
263 }
264
265 let current_dir = env::current_dir().unwrap();
266 log::info!("git pull in {current_dir:?}");
267
268 match Command::new("git").arg("pull").output() {
269 Ok(result) => {
270 if result.status.success() {
271 log::info!(
272 "git_pull exit code: '{}' in folder {:?}",
273 result.status,
274 current_dir
275 );
276 } else {
277 log::warn!(
278 "git_pull exit code: '{}' in folder {:?}",
279 result.status,
280 current_dir
281 );
282 }
283 }
284 Err(err) => {
285 log::error!("Could not run git_pull in folder {current_dir:?} error: {err}")
286 }
287 }
288 }
289
290 fn git_clone(&self, depth: Option<usize>) {
291 if !self.check_url() {
292 log::error!("Repository URL is not reachable: {}", self.url());
293 return;
294 }
295
296 let current_dir = env::current_dir().unwrap();
297
298 let url = self.url();
299 log::info!("git clone {url} in {current_dir:?}");
300
301 let mut cmd = Command::new("git");
302 cmd.arg("clone");
303 if let Some(depth) = depth {
304 cmd.arg(format!("--depth={depth}"));
305 }
306 match cmd.arg(self.url()).output() {
307 Ok(result) => {
308 if result.status.success() {
309 log::info!("git_clone exit code: '{}'", result.status);
310 } else {
311 log::warn!(
312 "git_clone exit code: '{}' for url '{}' in '{current_dir:?}'",
313 result.status,
314 url,
315 );
316 }
317 }
318 Err(err) => {
319 log::error!("Could not run `git clone {url}` in {current_dir:?} error: {err}")
320 }
321 }
322 }
323
324 pub fn check_url(&self) -> bool {
325 let url = self.url();
326 let response = ureq::get(&url).call();
327 match response {
328 Ok(_) => true,
329 Err(err) => {
330 log::error!("Error checking URL '{}': {}", url, err);
331 false
332 }
333 }
334 }
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340
341 #[test]
342 fn test_get_owner_and_repo() {
343 let root = Path::new("/tmp");
344 let expected = Repository::new("github.com", "szabgab", "rust-digger");
345
346 let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
348 assert_eq!(repo, expected);
349 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
350 assert_eq!(
351 repo.path(root).to_str(),
352 Some("/tmp/github.com/szabgab/rust-digger")
353 );
354 assert!(repo.is_github());
355 assert!(!repo.is_gitlab());
356 assert_eq!(repo.get_owner(), "szabgab");
357
358 let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
360 assert_eq!(repo, expected);
361 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
362 assert!(repo.is_github());
363
364 let repo = Repository::from_url("http://github.com/szabgab/rust-digger/").unwrap();
366 assert_eq!(repo, expected);
367 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
368 assert!(repo.is_github());
369
370 let repo = Repository::from_url(
372 "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
373 )
374 .unwrap();
375 assert_eq!(
376 repo,
377 Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
378 );
379 assert_eq!(
380 repo.url(),
381 "https://github.com/crypto-crawler/crypto-crawler-rs"
382 );
383 assert!(repo.is_github());
384
385 let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
387 assert_eq!(
388 repo,
389 Repository::new("gitlab.com", "szabgab", "rust-digger")
390 );
391 assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
392 assert!(!repo.is_github());
393 assert!(repo.is_gitlab());
394
395 let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
397 assert_eq!(
398 repo,
399 Repository::new("gitlab.com", "szabgab", "rust-digger")
400 );
401 assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
402 assert_eq!(repo.owner, "szabgab");
403 assert_eq!(repo.repo, "rust-digger");
404 assert_eq!(
405 repo.path(root).to_str(),
406 Some("/tmp/gitlab.com/szabgab/rust-digger")
407 );
408
409 let repo = Repository::from_url("https://salsa.debian.org/szabgab/rust-digger/").unwrap();
411 assert_eq!(
412 repo,
413 Repository::new("salsa.debian.org", "szabgab", "rust-digger")
414 );
415 assert_eq!(repo.url(), "https://salsa.debian.org/szabgab/rust-digger");
416 assert_eq!(repo.owner, "szabgab");
417 assert_eq!(repo.repo, "rust-digger");
418 assert_eq!(
419 repo.path(root).to_str(),
420 Some("/tmp/salsa.debian.org/szabgab/rust-digger")
421 );
422 assert!(!repo.is_github());
423 assert!(repo.is_gitlab());
424
425 let res = Repository::from_url("https://blabla.com/");
427 assert!(res.is_err());
428 assert_eq!(
429 res.unwrap_err().to_string(),
430 "No match for repo in 'https://blabla.com/'"
431 );
432
433 let repo = Repository::from_url("https://bitbucket.org/szabgab/rust-digger/").unwrap();
434 assert_eq!(
435 repo,
436 Repository::new("bitbucket.org", "szabgab", "rust-digger")
437 );
438
439 let repo = Repository::from_url("https://codeberg.org/szabgab/rust-digger/").unwrap();
440 assert_eq!(
441 repo,
442 Repository::new("codeberg.org", "szabgab", "rust-digger")
443 );
444 }
445
446 #[test]
447 fn test_check_good_url() {
448 let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
449 assert!(repo.check_url());
450 }
451
452 #[test]
453 fn test_check_missing_url() {
454 let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
455 assert!(!repo.check_url());
456 }
457
458 #[test]
459 fn test_clone_missing_repo() {
460 let temp_folder = tempfile::tempdir().unwrap();
461 let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
462 repo.update_repository(Path::new(temp_folder.path()), true, None)
463 .unwrap();
464 let owner_path = temp_folder.path().join("github.com").join("szabgab");
465 assert!(owner_path.exists());
466 assert!(!owner_path.join("no-such-repo").exists());
467 }
468
469 #[test]
470 fn test_clone_this_repo() {
471 let temp_folder = tempfile::tempdir().unwrap();
472 let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
473 repo.update_repository(Path::new(temp_folder.path()), true, None)
474 .unwrap();
475 let owner_path = temp_folder.path().join("github.com").join("szabgab");
476 assert!(owner_path.exists());
477 assert!(owner_path.join("git-digger").exists());
478 }
479}