git_digger/
lib.rs

1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10const URL_REGEXES: [&str; 3] = [
11    "^https?://(github.com)/([^/]+)/([^/]+)/?.*$",
12    "^https?://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
13    "^https?://(salsa.debian.org)/([^/]+)/([^/]+)/?.*$",
14];
15
16#[derive(Debug, PartialEq)]
17#[allow(dead_code)]
18pub struct Repository {
19    host: String,
20    owner: String,
21    repo: String,
22}
23
24#[allow(dead_code)]
25impl Repository {
26    /// Represent a git repository in one of the git hosting providers
27    pub fn new(host: &str, owner: &str, repo: &str) -> Self {
28        Self {
29            host: host.to_string(),
30            owner: owner.to_string(),
31            repo: repo.to_string(),
32        }
33    }
34
35    /// Extracts the owner and repository name from a URL.
36    ///
37    /// Returns Repository
38    ///
39    /// Where host is either "github" or "gitlab" for now.
40    ///
41    /// e.g. https://github.com/szabgab/rust-digger -> ("github", "szabgab", "rust-digger")
42    pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
43        static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
44            URL_REGEXES
45                .iter()
46                .map(|reg| Regex::new(reg).unwrap())
47                .collect::<Vec<Regex>>()
48        });
49
50        for re in REGS.iter() {
51            if let Some(repo_url) = re.captures(url) {
52                let host = repo_url[1].to_lowercase();
53                let owner = repo_url[2].to_lowercase();
54                let repo = repo_url[3].to_lowercase();
55                return Ok(Self { host, owner, repo });
56            }
57        }
58        Err(format!("No match for repo in '{}'", &url).into())
59    }
60
61    pub fn url(&self) -> String {
62        format!("https://{}/{}/{}", self.host, self.owner, self.repo)
63    }
64
65    pub fn path(&self, root: &Path) -> PathBuf {
66        self.owner_path(root).join(&self.repo)
67    }
68
69    pub fn owner_path(&self, root: &Path) -> PathBuf {
70        root.join(&self.host).join(&self.owner)
71    }
72
73    pub fn get_owner(&self) -> &str {
74        &self.owner
75    }
76
77    pub fn is_github(&self) -> bool {
78        &self.host == "github.com"
79    }
80
81    pub fn is_gitlab(&self) -> bool {
82        ["gitlab.com", "salsa.debian.org"].contains(&self.host.as_str())
83    }
84
85    pub fn has_github_actions(&self, root: &Path) -> bool {
86        if !self.is_github() {
87            return false;
88        }
89
90        let path = self.path(root);
91        let dot_github = path.join(".github");
92        if !dot_github.exists() {
93            return false;
94        }
95
96        let workflow_dir = dot_github.join("workflows");
97        if !workflow_dir.exists() {
98            return false;
99        }
100
101        if let Ok(entries) = workflow_dir.read_dir() {
102            let yaml_count = entries
103                .filter_map(|entry| entry.ok())
104                .filter(|entry| {
105                    entry
106                        .path()
107                        .extension()
108                        .and_then(|ext| ext.to_str())
109                        .map(|ext| ext == "yml" || ext == "yaml")
110                        .unwrap_or(false)
111                })
112                .count();
113            if yaml_count > 0 {
114                return true;
115            }
116        }
117
118        false
119    }
120
121    //let _ = git2::Repository::clone(repo, temp_dir_str);
122    /// Run `git clone` or `git pull` to update a single repository
123    pub fn update_repository(
124        &self,
125        root: &Path,
126        clone: bool,
127        depth: Option<usize>,
128    ) -> Result<(), Box<dyn Error>> {
129        let owner_path = self.owner_path(root);
130        let current_dir = env::current_dir()?;
131        log::info!(
132            "Creating owner_path {:?} while current_dir is {:?}",
133            &owner_path,
134            &current_dir
135        );
136        fs::create_dir_all(&owner_path)?;
137        let repo_path = self.path(root);
138        if Path::new(&repo_path).exists() {
139            if clone {
140                log::info!("repo exist but we only clone now.  Skipping.");
141            } else {
142                log::info!("repo exist; cd to {:?}", &repo_path);
143                env::set_current_dir(&repo_path)?;
144                self.git_pull();
145            }
146        } else {
147            log::info!("new repo; cd to {:?}", &owner_path);
148            env::set_current_dir(owner_path)?;
149            self.git_clone(depth);
150        }
151        env::set_current_dir(current_dir)?;
152        Ok(())
153    }
154
155    fn git_pull(&self) {
156        if !self.check_url() {
157            log::error!("Repository URL is not reachable: {}", self.url());
158            return;
159        }
160
161        let current_dir = env::current_dir().unwrap();
162        log::info!("git pull in {current_dir:?}");
163
164        match Command::new("git").arg("pull").output() {
165            Ok(result) => {
166                if result.status.success() {
167                    log::info!(
168                        "git_pull exit code: '{}' in folder {:?}",
169                        result.status,
170                        current_dir
171                    );
172                } else {
173                    log::warn!(
174                        "git_pull exit code: '{}' in folder {:?}",
175                        result.status,
176                        current_dir
177                    );
178                }
179            }
180            Err(err) => {
181                log::error!("Could not run git_pull in folder {current_dir:?} error: {err}")
182            }
183        }
184    }
185
186    fn git_clone(&self, depth: Option<usize>) {
187        if !self.check_url() {
188            log::error!("Repository URL is not reachable: {}", self.url());
189            return;
190        }
191
192        let current_dir = env::current_dir().unwrap();
193
194        let url = self.url();
195        log::info!("git clone {url} in {current_dir:?}");
196
197        let mut cmd = Command::new("git");
198        cmd.arg("clone");
199        if let Some(depth) = depth {
200            cmd.arg(format!("--depth={depth}"));
201        }
202        match cmd.arg(self.url()).output() {
203            Ok(result) => {
204                if result.status.success() {
205                    log::info!("git_clone exit code: '{}'", result.status);
206                } else {
207                    log::warn!(
208                        "git_clone exit code: '{}' for url '{}' in '{current_dir:?}'",
209                        result.status,
210                        url,
211                    );
212                }
213            }
214            Err(err) => {
215                log::error!("Could not run `git clone {url}` in {current_dir:?} error: {err}")
216            }
217        }
218    }
219
220    pub fn check_url(&self) -> bool {
221        let url = self.url();
222        let response = ureq::get(&url).call();
223        match response {
224            Ok(_) => true,
225            Err(err) => {
226                log::error!("Error checking URL '{}': {}", url, err);
227                false
228            }
229        }
230    }
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    #[test]
238    fn test_get_owner_and_repo() {
239        let root = Path::new("/tmp");
240        let expected = Repository::new("github.com", "szabgab", "rust-digger");
241
242        // test https github.com, no slash at the end
243        let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
244        assert_eq!(repo, expected);
245        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
246        assert_eq!(
247            repo.path(root).to_str(),
248            Some("/tmp/github.com/szabgab/rust-digger")
249        );
250        assert!(repo.is_github());
251        assert!(!repo.is_gitlab());
252        assert_eq!(repo.get_owner(), "szabgab");
253
254        // test http github.com trailing slash
255        let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
256        assert_eq!(repo, expected);
257        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
258        assert!(repo.is_github());
259
260        // test http github.com trailing slash
261        let repo = Repository::from_url("http://github.com/szabgab/rust-digger/").unwrap();
262        assert_eq!(repo, expected);
263        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
264        assert!(repo.is_github());
265
266        // test https github.com link to a file
267        let repo = Repository::from_url(
268            "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
269        )
270        .unwrap();
271        assert_eq!(
272            repo,
273            Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
274        );
275        assert_eq!(
276            repo.url(),
277            "https://github.com/crypto-crawler/crypto-crawler-rs"
278        );
279        assert!(repo.is_github());
280
281        // test https gitlab.com
282        let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
283        assert_eq!(
284            repo,
285            Repository::new("gitlab.com", "szabgab", "rust-digger")
286        );
287        assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
288        assert!(!repo.is_github());
289        assert!(repo.is_gitlab());
290
291        // test converting to lowercase  gitlab.com
292        let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
293        assert_eq!(
294            repo,
295            Repository::new("gitlab.com", "szabgab", "rust-digger")
296        );
297        assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
298        assert_eq!(repo.owner, "szabgab");
299        assert_eq!(repo.repo, "rust-digger");
300        assert_eq!(
301            repo.path(root).to_str(),
302            Some("/tmp/gitlab.com/szabgab/rust-digger")
303        );
304
305        // test salsa
306        let repo = Repository::from_url("https://salsa.debian.org/szabgab/rust-digger/").unwrap();
307        assert_eq!(
308            repo,
309            Repository::new("salsa.debian.org", "szabgab", "rust-digger")
310        );
311        assert_eq!(repo.url(), "https://salsa.debian.org/szabgab/rust-digger");
312        assert_eq!(repo.owner, "szabgab");
313        assert_eq!(repo.repo, "rust-digger");
314        assert_eq!(
315            repo.path(root).to_str(),
316            Some("/tmp/salsa.debian.org/szabgab/rust-digger")
317        );
318        assert!(!repo.is_github());
319        assert!(repo.is_gitlab());
320
321        // test incorrect URL
322        let res = Repository::from_url("https://blabla.com/");
323        assert!(res.is_err());
324        assert_eq!(
325            res.unwrap_err().to_string(),
326            "No match for repo in 'https://blabla.com/'"
327        );
328    }
329
330    #[test]
331    fn test_check_good_url() {
332        let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
333        assert!(repo.check_url());
334    }
335
336    #[test]
337    fn test_check_missing_url() {
338        let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
339        assert!(!repo.check_url());
340    }
341
342    #[test]
343    fn test_clone_missing_repo() {
344        let temp_folder = tempfile::tempdir().unwrap();
345        let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
346        repo.update_repository(Path::new(temp_folder.path()), true, None)
347            .unwrap();
348        let owner_path = temp_folder.path().join("github.com").join("szabgab");
349        assert!(owner_path.exists());
350        assert!(!owner_path.join("no-such-repo").exists());
351    }
352
353    #[test]
354    fn test_clone_this_repo() {
355        let temp_folder = tempfile::tempdir().unwrap();
356        let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
357        repo.update_repository(Path::new(temp_folder.path()), true, None)
358            .unwrap();
359        let owner_path = temp_folder.path().join("github.com").join("szabgab");
360        assert!(owner_path.exists());
361        assert!(owner_path.join("git-digger").exists());
362    }
363}