git_digger/
lib.rs

1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10const URL_REGEXES: [&str; 3] = [
11    "^https?://(github.com)/([^/]+)/([^/]+)/?.*$",
12    "^https?://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
13    "^https?://(salsa.debian.org)/([^/]+)/([^/]+)/?.*$",
14];
15
16#[derive(Debug, PartialEq)]
17#[allow(dead_code)]
18pub struct Repository {
19    host: String,
20    owner: String,
21    repo: String,
22}
23
24#[allow(dead_code)]
25impl Repository {
26    /// Represent a git repository in one of the git hosting providers
27    fn new(host: &str, owner: &str, repo: &str) -> Self {
28        Self {
29            host: host.to_string(),
30            owner: owner.to_string(),
31            repo: repo.to_string(),
32        }
33    }
34
35    /// Extracts the owner and repository name from a URL.
36    ///
37    /// Returns Repository
38    ///
39    /// Where host is either "github" or "gitlab" for now.
40    ///
41    /// e.g. https://github.com/szabgab/rust-digger -> ("github", "szabgab", "rust-digger")
42    pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
43        static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
44            URL_REGEXES
45                .iter()
46                .map(|reg| Regex::new(reg).unwrap())
47                .collect::<Vec<Regex>>()
48        });
49
50        for re in REGS.iter() {
51            if let Some(repo_url) = re.captures(url) {
52                let host = repo_url[1].to_lowercase();
53                let owner = repo_url[2].to_lowercase();
54                let repo = repo_url[3].to_lowercase();
55                return Ok(Self { host, owner, repo });
56            }
57        }
58        Err(format!("No match for repo in '{}'", &url).into())
59    }
60
61    pub fn url(&self) -> String {
62        format!("https://{}/{}/{}", self.host, self.owner, self.repo)
63    }
64
65    pub fn path(&self, root: &Path) -> PathBuf {
66        self.owner_path(root).join(&self.repo)
67    }
68
69    pub fn owner_path(&self, root: &Path) -> PathBuf {
70        root.join(&self.host).join(&self.owner)
71    }
72
73    pub fn get_owner(&self) -> &str {
74        &self.owner
75    }
76
77    pub fn is_github(&self) -> bool {
78        &self.host == "github.com"
79    }
80
81    pub fn is_gitlab(&self) -> bool {
82        ["gitlab.com", "salsa.debian.org"].contains(&self.host.as_str())
83    }
84
85    //let _ = git2::Repository::clone(repo, temp_dir_str);
86    /// Run `git clone` or `git pull` to update a single repository
87    pub fn update_repository(&self, root: &Path, clone: bool) -> Result<(), Box<dyn Error>> {
88        let owner_path = self.owner_path(root);
89        let current_dir = env::current_dir()?;
90        log::info!(
91            "Creating owner_path {:?} while current_dir is {:?}",
92            &owner_path,
93            &current_dir
94        );
95        fs::create_dir_all(&owner_path)?;
96        let repo_path = self.path(root);
97        if Path::new(&repo_path).exists() {
98            if clone {
99                log::info!("repo exist but we only clone now.  Skipping.");
100            } else {
101                log::info!("repo exist; cd to {:?}", &repo_path);
102                env::set_current_dir(&repo_path)?;
103                self.git_pull();
104            }
105        } else {
106            log::info!("new repo; cd to {:?}", &owner_path);
107            env::set_current_dir(owner_path)?;
108            self.git_clone();
109        }
110        env::set_current_dir(current_dir)?;
111        Ok(())
112    }
113
114    fn git_pull(&self) {
115        if !self.check_url() {
116            log::error!("Repository URL is not reachable: {}", self.url());
117            return;
118        }
119
120        let current_dir = env::current_dir().unwrap();
121        log::info!("git pull in {current_dir:?}");
122
123        match Command::new("git").arg("pull").output() {
124            Ok(result) => {
125                if result.status.success() {
126                    log::info!(
127                        "git_pull exit code: '{}' in folder {:?}",
128                        result.status,
129                        current_dir
130                    );
131                } else {
132                    log::warn!(
133                        "git_pull exit code: '{}' in folder {:?}",
134                        result.status,
135                        current_dir
136                    );
137                }
138            }
139            Err(err) => {
140                log::error!("Could not run git_pull in folder {current_dir:?} error: {err}")
141            }
142        }
143    }
144
145    fn git_clone(&self) {
146        if !self.check_url() {
147            log::error!("Repository URL is not reachable: {}", self.url());
148            return;
149        }
150
151        let current_dir = env::current_dir().unwrap();
152
153        let url = self.url();
154        log::info!("git clone {url} in {current_dir:?}");
155
156        match Command::new("git").arg("clone").arg(self.url()).output() {
157            Ok(result) => {
158                if result.status.success() {
159                    log::info!("git_clone exit code: '{}'", result.status);
160                } else {
161                    log::warn!(
162                        "git_clone exit code: '{}' for url '{}' in '{current_dir:?}'",
163                        result.status,
164                        url,
165                    );
166                }
167            }
168            Err(err) => {
169                log::error!("Could not run `git clone {url}` in {current_dir:?} error: {err}")
170            }
171        }
172    }
173
174    pub fn check_url(&self) -> bool {
175        let url = self.url();
176        let response = ureq::get(&url).call();
177        match response {
178            Ok(_) => true,
179            Err(err) => {
180                log::error!("Error checking URL '{}': {}", url, err);
181                false
182            }
183        }
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_get_owner_and_repo() {
193        let root = Path::new("/tmp");
194        let expected = Repository::new("github.com", "szabgab", "rust-digger");
195
196        // test https github.com, no slash at the end
197        let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
198        assert_eq!(repo, expected);
199        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
200        assert_eq!(
201            repo.path(root).to_str(),
202            Some("/tmp/github.com/szabgab/rust-digger")
203        );
204        assert!(repo.is_github());
205        assert!(!repo.is_gitlab());
206        assert_eq!(repo.get_owner(), "szabgab");
207
208        // test http github.com trailing slash
209        let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
210        assert_eq!(repo, expected);
211        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
212        assert!(repo.is_github());
213
214        // test http github.com trailing slash
215        let repo = Repository::from_url("http://github.com/szabgab/rust-digger/").unwrap();
216        assert_eq!(repo, expected);
217        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
218        assert!(repo.is_github());
219
220        // test https github.com link to a file
221        let repo = Repository::from_url(
222            "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
223        )
224        .unwrap();
225        assert_eq!(
226            repo,
227            Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
228        );
229        assert_eq!(
230            repo.url(),
231            "https://github.com/crypto-crawler/crypto-crawler-rs"
232        );
233        assert!(repo.is_github());
234
235        // test https gitlab.com
236        let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
237        assert_eq!(
238            repo,
239            Repository::new("gitlab.com", "szabgab", "rust-digger")
240        );
241        assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
242        assert!(!repo.is_github());
243        assert!(repo.is_gitlab());
244
245        // test converting to lowercase  gitlab.com
246        let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
247        assert_eq!(
248            repo,
249            Repository::new("gitlab.com", "szabgab", "rust-digger")
250        );
251        assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
252        assert_eq!(repo.owner, "szabgab");
253        assert_eq!(repo.repo, "rust-digger");
254        assert_eq!(
255            repo.path(root).to_str(),
256            Some("/tmp/gitlab.com/szabgab/rust-digger")
257        );
258
259        // test salsa
260        let repo = Repository::from_url("https://salsa.debian.org/szabgab/rust-digger/").unwrap();
261        assert_eq!(
262            repo,
263            Repository::new("salsa.debian.org", "szabgab", "rust-digger")
264        );
265        assert_eq!(repo.url(), "https://salsa.debian.org/szabgab/rust-digger");
266        assert_eq!(repo.owner, "szabgab");
267        assert_eq!(repo.repo, "rust-digger");
268        assert_eq!(
269            repo.path(root).to_str(),
270            Some("/tmp/salsa.debian.org/szabgab/rust-digger")
271        );
272        assert!(!repo.is_github());
273        assert!(repo.is_gitlab());
274
275        // test incorrect URL
276        let res = Repository::from_url("https://blabla.com/");
277        assert!(res.is_err());
278        assert_eq!(
279            res.unwrap_err().to_string(),
280            "No match for repo in 'https://blabla.com/'"
281        );
282    }
283
284    #[test]
285    fn test_check_good_url() {
286        let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
287        assert!(repo.check_url());
288    }
289
290    #[test]
291    fn test_check_missing_url() {
292        let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
293        assert!(!repo.check_url());
294    }
295
296    #[test]
297    fn test_clone_missing_repo() {
298        let temp_folder = tempfile::tempdir().unwrap();
299        let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
300        repo.update_repository(Path::new(temp_folder.path()), true)
301            .unwrap();
302        let owner_path = temp_folder.path().join("github.com").join("szabgab");
303        assert!(owner_path.exists());
304        assert!(!owner_path.join("no-such-repo").exists());
305    }
306
307    #[test]
308    fn test_clone_this_repo() {
309        let temp_folder = tempfile::tempdir().unwrap();
310        let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
311        repo.update_repository(Path::new(temp_folder.path()), true)
312            .unwrap();
313        let owner_path = temp_folder.path().join("github.com").join("szabgab");
314        assert!(owner_path.exists());
315        assert!(owner_path.join("git-digger").exists());
316    }
317}