git_digger/
lib.rs

1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10const URL_REGEXES: [&str; 3] = [
11    "^https?://(github.com)/([^/]+)/([^/]+)/?.*$",
12    "^https?://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
13    "^https?://(salsa.debian.org)/([^/]+)/([^/]+)/?.*$",
14];
15
16#[derive(Debug, PartialEq)]
17#[allow(dead_code)]
18pub struct Repository {
19    host: String,
20    owner: String,
21    repo: String,
22}
23
24#[allow(dead_code)]
25impl Repository {
26    fn new(host: &str, owner: &str, repo: &str) -> Self {
27        Self {
28            host: host.to_string(),
29            owner: owner.to_string(),
30            repo: repo.to_string(),
31        }
32    }
33
34    /// Extracts the owner and repository name from a URL.
35    ///
36    /// Returns Repository
37    ///
38    /// Where host is either "github" or "gitlab" for now.
39    ///
40    /// e.g. https://github.com/szabgab/rust-digger -> ("github", "szabgab", "rust-digger")
41    pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
42        static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
43            URL_REGEXES
44                .iter()
45                .map(|reg| Regex::new(reg).unwrap())
46                .collect::<Vec<Regex>>()
47        });
48
49        for re in REGS.iter() {
50            if let Some(repo_url) = re.captures(url) {
51                let host = repo_url[1].to_lowercase();
52                let owner = repo_url[2].to_lowercase();
53                let repo = repo_url[3].to_lowercase();
54                return Ok(Self { host, owner, repo });
55            }
56        }
57        Err(format!("No match for repo in '{}'", &url).into())
58    }
59
60    pub fn url(&self) -> String {
61        format!("https://{}/{}/{}", self.host, self.owner, self.repo)
62    }
63
64    pub fn path(&self, root: &Path) -> PathBuf {
65        self.owner_path(root).join(&self.repo)
66    }
67
68    pub fn owner_path(&self, root: &Path) -> PathBuf {
69        root.join(&self.host).join(&self.owner)
70    }
71
72    //let _ = git2::Repository::clone(repo, temp_dir_str);
73    /// Run `git clone` or `git pull` to update a single repository
74    pub fn update_repository(&self, root: &Path, clone: bool) -> Result<(), Box<dyn Error>> {
75        let owner_path = self.owner_path(root);
76        let current_dir = env::current_dir()?;
77        log::info!(
78            "Creating owner_path {:?} while current_dir is {:?}",
79            &owner_path,
80            &current_dir
81        );
82        fs::create_dir_all(&owner_path)?;
83        let repo_path = self.path(root);
84        if Path::new(&repo_path).exists() {
85            if clone {
86                log::info!("repo exist but we only clone now.  Skipping.");
87            } else {
88                log::info!("repo exist; cd to {:?}", &repo_path);
89                env::set_current_dir(&repo_path)?;
90                self.git_pull();
91            }
92        } else {
93            log::info!("new repo; cd to {:?}", &owner_path);
94            env::set_current_dir(owner_path)?;
95            self.git_clone();
96        }
97        env::set_current_dir(current_dir)?;
98        Ok(())
99    }
100
101    fn git_pull(&self) {
102        let current_dir = env::current_dir().unwrap();
103        log::info!("git pull in {current_dir:?}");
104
105        match Command::new("git").arg("pull").output() {
106            Ok(result) => {
107                if result.status.success() {
108                    log::info!(
109                        "git_pull exit code: '{}' in folder {:?}",
110                        result.status,
111                        current_dir
112                    );
113                } else {
114                    log::warn!(
115                        "git_pull exit code: '{}' in folder {:?}",
116                        result.status,
117                        current_dir
118                    );
119                }
120            }
121            Err(err) => {
122                log::error!("Could not run git_pull in folder {current_dir:?} error: {err}")
123            }
124        }
125    }
126
127    fn git_clone(&self) {
128        let current_dir = env::current_dir().unwrap();
129        let url = self.url();
130        log::info!("git clone {url} in {current_dir:?}");
131        match Command::new("git").arg("clone").arg(self.url()).output() {
132            Ok(result) => {
133                if result.status.success() {
134                    log::info!("git_clone exit code: '{}'", result.status);
135                } else {
136                    log::warn!(
137                        "git_clone exit code: '{}' for url '{}' in '{current_dir:?}'",
138                        result.status,
139                        url,
140                    );
141                }
142            }
143            Err(err) => {
144                log::error!("Could not run `git clone {url}` in {current_dir:?} error: {err}")
145            }
146        }
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    #[test]
155    fn test_get_owner_and_repo() {
156        let root = Path::new("/tmp");
157        let expected = Repository::new("github.com", "szabgab", "rust-digger");
158
159        // test https github.com, no slash at the end
160        let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
161        assert_eq!(repo, expected);
162        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
163        assert_eq!(
164            repo.path(root).to_str(),
165            Some("/tmp/github.com/szabgab/rust-digger")
166        );
167
168        // test http github.com trailing slash
169        let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
170        assert_eq!(repo, expected);
171        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
172
173        // test http github.com trailing slash
174        let repo = Repository::from_url("http://github.com/szabgab/rust-digger/").unwrap();
175        assert_eq!(repo, expected);
176        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
177
178        // test https github.com link to a file
179        let repo = Repository::from_url(
180            "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
181        )
182        .unwrap();
183        assert_eq!(
184            repo,
185            Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
186        );
187        assert_eq!(
188            repo.url(),
189            "https://github.com/crypto-crawler/crypto-crawler-rs"
190        );
191
192        // test https gitlab.com
193        let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
194        assert_eq!(
195            repo,
196            Repository::new("gitlab.com", "szabgab", "rust-digger")
197        );
198        assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
199
200        // test converting to lowercase  gitlab.com
201        let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
202        assert_eq!(
203            repo,
204            Repository::new("gitlab.com", "szabgab", "rust-digger")
205        );
206        assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
207        assert_eq!(repo.owner, "szabgab");
208        assert_eq!(repo.repo, "rust-digger");
209        assert_eq!(
210            repo.path(root).to_str(),
211            Some("/tmp/gitlab.com/szabgab/rust-digger")
212        );
213
214        // test incorrect URL
215        let res = Repository::from_url("https://blabla.com/");
216        assert!(res.is_err());
217        assert_eq!(
218            res.unwrap_err().to_string(),
219            "No match for repo in 'https://blabla.com/'"
220        );
221    }
222}