git_digger/
lib.rs

1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10#[derive(Debug, serde::Serialize, serde::Deserialize)]
11#[non_exhaustive]
12pub enum RepoPlatform {
13    GitHub,    // https://github.com/
14    GitLab,    // https://gitlab.com/
15    Gitea,     // https://about.gitea.com/
16    Cgit,      // https://git.zx2c4.com/cgit/about/
17    Forgejo,   // https://forgejo.org/
18    Fossil,    // https://fossil-scm.org/
19    Mercurial, // https://www.mercurial-scm.org/
20    Gogs,      // https://gogs.io/
21}
22
23const URL_REGEXES: [&str; 5] = [
24    "^https?://(github.com)/([^/]+)/([^/]+)/?.*$",
25    "^https?://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
26    "^https?://(salsa.debian.org)/([^/]+)/([^/]+)/?.*$",
27    r"^https?://(bitbucket.org)/([^/]+)/([^/]+)/?.*$",
28    r"^https?://(codeberg.org)/([^/]+)/([^/]+)(/.*)?$",
29];
30
31#[derive(Debug, PartialEq)]
32#[allow(dead_code)]
33pub struct Repository {
34    host: String,
35    owner: String,
36    repo: String,
37}
38
39#[allow(dead_code)]
40impl Repository {
41    /// Represent a git repository in one of the git hosting providers
42    pub fn new(host: &str, owner: &str, repo: &str) -> Self {
43        Self {
44            host: host.to_string(),
45            owner: owner.to_string(),
46            repo: repo.to_string(),
47        }
48    }
49
50    /// Extracts the owner and repository name from a URL.
51    ///
52    /// Returns Repository
53    ///
54    /// Where host is either "github" or "gitlab" for now.
55    ///
56    /// e.g. https://github.com/szabgab/rust-digger -> ("github", "szabgab", "rust-digger")
57    pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
58        static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
59            URL_REGEXES
60                .iter()
61                .map(|reg| Regex::new(reg).unwrap())
62                .collect::<Vec<Regex>>()
63        });
64
65        for re in REGS.iter() {
66            if let Some(repo_url) = re.captures(url) {
67                let host = repo_url[1].to_lowercase();
68                let owner = repo_url[2].to_lowercase();
69                let repo = repo_url[3].to_lowercase();
70                return Ok(Self { host, owner, repo });
71            }
72        }
73        Err(format!("No match for repo in '{}'", &url).into())
74    }
75
76    pub fn url(&self) -> String {
77        format!("https://{}/{}/{}", self.host, self.owner, self.repo)
78    }
79
80    pub fn path(&self, root: &Path) -> PathBuf {
81        self.owner_path(root).join(&self.repo)
82    }
83
84    pub fn owner_path(&self, root: &Path) -> PathBuf {
85        root.join(&self.host).join(&self.owner)
86    }
87
88    pub fn get_owner(&self) -> &str {
89        &self.owner
90    }
91
92    pub fn is_github(&self) -> bool {
93        &self.host == "github.com"
94    }
95
96    pub fn is_gitlab(&self) -> bool {
97        ["gitlab.com", "salsa.debian.org"].contains(&self.host.as_str())
98    }
99
100    pub fn is_bitbucket(&self) -> bool {
101        &self.host == "bitbucket.org"
102    }
103
104    pub fn has_github_actions(&self, root: &Path) -> bool {
105        if !self.is_github() {
106            return false;
107        }
108
109        let path = self.path(root);
110        let dot_github = path.join(".github");
111        if !dot_github.exists() {
112            return false;
113        }
114
115        let workflow_dir = dot_github.join("workflows");
116        if !workflow_dir.exists() {
117            return false;
118        }
119
120        if let Ok(entries) = workflow_dir.read_dir() {
121            let yaml_count = entries
122                .filter_map(|entry| entry.ok())
123                .filter(|entry| {
124                    entry
125                        .path()
126                        .extension()
127                        .and_then(|ext| ext.to_str())
128                        .map(|ext| ext == "yml" || ext == "yaml")
129                        .unwrap_or(false)
130                })
131                .count();
132            if yaml_count > 0 {
133                return true;
134            }
135        }
136
137        false
138    }
139
140    pub fn has_dependabot(&self, root: &Path) -> bool {
141        if !self.is_github() {
142            return false;
143        }
144
145        let path = self.path(root);
146        let dot_github = path.join(".github");
147
148        if !dot_github.exists() {
149            return false;
150        }
151
152        let dependabot_file = dot_github.join("dependabot.yml");
153        dependabot_file.exists()
154    }
155
156    pub fn has_gitlab_pipeline(&self, root: &Path) -> bool {
157        if !self.is_gitlab() {
158            return false;
159        }
160
161        let path = self.path(root);
162        let ci_file = path.join(".gitlab-ci.yml");
163
164        ci_file.exists()
165    }
166
167    pub fn has_bitbucket_pipeline(&self, root: &Path) -> bool {
168        if !self.is_bitbucket() {
169            return false;
170        }
171
172        let path = self.path(root);
173        let ci_file = path.join("bitbucket-pipelines.yml");
174        ci_file.exists()
175    }
176
177    pub fn has_circle_ci(&self, root: &Path) -> bool {
178        if !self.is_github() {
179            return false;
180        }
181
182        let path = self.path(root);
183        let ci_folder = path.join(".circleci");
184
185        ci_folder.exists()
186    }
187
188    pub fn has_cirrus_ci(&self, root: &Path) -> bool {
189        if !self.is_github() {
190            return false;
191        }
192
193        let path = self.path(root);
194        let ci_folder = path.join(".cirrusci");
195
196        ci_folder.exists()
197    }
198
199    pub fn has_travis(&self, root: &Path) -> bool {
200        if !self.is_github() {
201            return false;
202        }
203
204        let path = self.path(root);
205        let ci_file = path.join(".travis.yaml");
206
207        ci_file.exists()
208    }
209
210    pub fn has_jenkins(&self, root: &Path) -> bool {
211        let path = self.path(root);
212        let ci_file = path.join("Jenkinsfile");
213
214        ci_file.exists()
215    }
216
217    pub fn has_appveyor(&self, root: &Path) -> bool {
218        let path = self.path(root);
219        let ci_file_1 = path.join("appveyor.yml");
220        let ci_file_2 = path.join(".appveyor.yml");
221
222        ci_file_1.exists() || ci_file_2.exists()
223    }
224
225    //let _ = git2::Repository::clone(repo, temp_dir_str);
226    /// Run `git clone` or `git pull` to update a single repository
227    pub fn update_repository(
228        &self,
229        root: &Path,
230        clone: bool,
231        depth: Option<usize>,
232    ) -> Result<(), Box<dyn Error>> {
233        let owner_path = self.owner_path(root);
234        let current_dir = env::current_dir()?;
235        log::info!(
236            "Creating owner_path {:?} while current_dir is {:?}",
237            &owner_path,
238            &current_dir
239        );
240        fs::create_dir_all(&owner_path)?;
241        let repo_path = self.path(root);
242        if Path::new(&repo_path).exists() {
243            if clone {
244                log::info!("repo exist but we only clone now.  Skipping.");
245            } else {
246                log::info!("repo exist; cd to {:?}", &repo_path);
247                env::set_current_dir(&repo_path)?;
248                self.git_pull();
249            }
250        } else {
251            log::info!("new repo; cd to {:?}", &owner_path);
252            env::set_current_dir(owner_path)?;
253            self.git_clone(depth);
254        }
255        env::set_current_dir(current_dir)?;
256        Ok(())
257    }
258
259    fn git_pull(&self) {
260        if !self.check_url() {
261            log::error!("Repository URL is not reachable: {}", self.url());
262            return;
263        }
264
265        let current_dir = env::current_dir().unwrap();
266        log::info!("git pull in {current_dir:?}");
267
268        match Command::new("git").arg("pull").output() {
269            Ok(result) => {
270                if result.status.success() {
271                    log::info!(
272                        "git_pull exit code: '{}' in folder {:?}",
273                        result.status,
274                        current_dir
275                    );
276                } else {
277                    log::warn!(
278                        "git_pull exit code: '{}' in folder {:?}",
279                        result.status,
280                        current_dir
281                    );
282                }
283            }
284            Err(err) => {
285                log::error!("Could not run git_pull in folder {current_dir:?} error: {err}")
286            }
287        }
288    }
289
290    fn git_clone(&self, depth: Option<usize>) {
291        if !self.check_url() {
292            log::error!("Repository URL is not reachable: {}", self.url());
293            return;
294        }
295
296        let current_dir = env::current_dir().unwrap();
297
298        let url = self.url();
299        log::info!("git clone {url} in {current_dir:?}");
300
301        let mut cmd = Command::new("git");
302        cmd.arg("clone");
303        if let Some(depth) = depth {
304            cmd.arg(format!("--depth={depth}"));
305        }
306        match cmd.arg(self.url()).output() {
307            Ok(result) => {
308                if result.status.success() {
309                    log::info!("git_clone exit code: '{}'", result.status);
310                } else {
311                    log::warn!(
312                        "git_clone exit code: '{}' for url '{}' in '{current_dir:?}'",
313                        result.status,
314                        url,
315                    );
316                }
317            }
318            Err(err) => {
319                log::error!("Could not run `git clone {url}` in {current_dir:?} error: {err}")
320            }
321        }
322    }
323
324    pub fn check_url(&self) -> bool {
325        let url = self.url();
326        let response = ureq::get(&url).call();
327        match response {
328            Ok(_) => true,
329            Err(err) => {
330                log::error!("Error checking URL '{}': {}", url, err);
331                false
332            }
333        }
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn test_get_owner_and_repo() {
343        let root = Path::new("/tmp");
344        let expected = Repository::new("github.com", "szabgab", "rust-digger");
345
346        // test https github.com, no slash at the end
347        let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
348        assert_eq!(repo, expected);
349        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
350        assert_eq!(
351            repo.path(root).to_str(),
352            Some("/tmp/github.com/szabgab/rust-digger")
353        );
354        assert!(repo.is_github());
355        assert!(!repo.is_gitlab());
356        assert_eq!(repo.get_owner(), "szabgab");
357
358        // test http github.com trailing slash
359        let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
360        assert_eq!(repo, expected);
361        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
362        assert!(repo.is_github());
363
364        // test http github.com trailing slash
365        let repo = Repository::from_url("http://github.com/szabgab/rust-digger/").unwrap();
366        assert_eq!(repo, expected);
367        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
368        assert!(repo.is_github());
369
370        // test https github.com link to a file
371        let repo = Repository::from_url(
372            "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
373        )
374        .unwrap();
375        assert_eq!(
376            repo,
377            Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
378        );
379        assert_eq!(
380            repo.url(),
381            "https://github.com/crypto-crawler/crypto-crawler-rs"
382        );
383        assert!(repo.is_github());
384
385        // test https gitlab.com
386        let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
387        assert_eq!(
388            repo,
389            Repository::new("gitlab.com", "szabgab", "rust-digger")
390        );
391        assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
392        assert!(!repo.is_github());
393        assert!(repo.is_gitlab());
394
395        // test converting to lowercase  gitlab.com
396        let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
397        assert_eq!(
398            repo,
399            Repository::new("gitlab.com", "szabgab", "rust-digger")
400        );
401        assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
402        assert_eq!(repo.owner, "szabgab");
403        assert_eq!(repo.repo, "rust-digger");
404        assert_eq!(
405            repo.path(root).to_str(),
406            Some("/tmp/gitlab.com/szabgab/rust-digger")
407        );
408
409        // test salsa
410        let repo = Repository::from_url("https://salsa.debian.org/szabgab/rust-digger/").unwrap();
411        assert_eq!(
412            repo,
413            Repository::new("salsa.debian.org", "szabgab", "rust-digger")
414        );
415        assert_eq!(repo.url(), "https://salsa.debian.org/szabgab/rust-digger");
416        assert_eq!(repo.owner, "szabgab");
417        assert_eq!(repo.repo, "rust-digger");
418        assert_eq!(
419            repo.path(root).to_str(),
420            Some("/tmp/salsa.debian.org/szabgab/rust-digger")
421        );
422        assert!(!repo.is_github());
423        assert!(repo.is_gitlab());
424
425        // test incorrect URL
426        let res = Repository::from_url("https://blabla.com/");
427        assert!(res.is_err());
428        assert_eq!(
429            res.unwrap_err().to_string(),
430            "No match for repo in 'https://blabla.com/'"
431        );
432
433        let repo = Repository::from_url("https://bitbucket.org/szabgab/rust-digger/").unwrap();
434        assert_eq!(
435            repo,
436            Repository::new("bitbucket.org", "szabgab", "rust-digger")
437        );
438
439        let repo = Repository::from_url("https://codeberg.org/szabgab/rust-digger/").unwrap();
440        assert_eq!(
441            repo,
442            Repository::new("codeberg.org", "szabgab", "rust-digger")
443        );
444    }
445
446    #[test]
447    fn test_check_good_url() {
448        let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
449        assert!(repo.check_url());
450    }
451
452    #[test]
453    fn test_check_missing_url() {
454        let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
455        assert!(!repo.check_url());
456    }
457
458    #[test]
459    fn test_clone_missing_repo() {
460        let temp_folder = tempfile::tempdir().unwrap();
461        let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
462        repo.update_repository(Path::new(temp_folder.path()), true, None)
463            .unwrap();
464        let owner_path = temp_folder.path().join("github.com").join("szabgab");
465        assert!(owner_path.exists());
466        assert!(!owner_path.join("no-such-repo").exists());
467    }
468
469    #[test]
470    fn test_clone_this_repo() {
471        let temp_folder = tempfile::tempdir().unwrap();
472        let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
473        repo.update_repository(Path::new(temp_folder.path()), true, None)
474            .unwrap();
475        let owner_path = temp_folder.path().join("github.com").join("szabgab");
476        assert!(owner_path.exists());
477        assert!(owner_path.join("git-digger").exists());
478    }
479}