git_digger/
lib.rs

1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10const URL_REGEXES: [&str; 2] = [
11    "^https?://(github.com)/([^/]+)/([^/]+)/?.*$",
12    "^https?://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
13];
14
15#[derive(Debug, PartialEq)]
16#[allow(dead_code)]
17pub struct Repository {
18    pub host: String,
19    pub owner: String,
20    pub repo: String,
21}
22
23#[allow(dead_code)]
24impl Repository {
25    fn new(host: &str, owner: &str, repo: &str) -> Self {
26        Self {
27            host: host.to_string(),
28            owner: owner.to_string(),
29            repo: repo.to_string(),
30        }
31    }
32
33    /// Extracts the owner and repository name from a URL.
34    ///
35    /// Returns Repository
36    ///
37    /// Where host is either "github" or "gitlab" for now.
38    ///
39    /// e.g. https://github.com/szabgab/rust-digger -> ("github", "szabgab", "rust-digger")
40    pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
41        static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
42            URL_REGEXES
43                .iter()
44                .map(|reg| Regex::new(reg).unwrap())
45                .collect::<Vec<Regex>>()
46        });
47
48        for re in REGS.iter() {
49            if let Some(repo_url) = re.captures(url) {
50                let host = repo_url[1].to_lowercase();
51                let owner = repo_url[2].to_lowercase();
52                let repo = repo_url[3].to_lowercase();
53                return Ok(Self { host, owner, repo });
54            }
55        }
56        Err(format!("No match for repo in '{}'", &url).into())
57    }
58
59    pub fn url(&self) -> String {
60        format!("https://{}/{}/{}", self.host, self.owner, self.repo)
61    }
62
63    pub fn path(&self, root: &Path) -> PathBuf {
64        self.owner_path(root).join(&self.repo)
65    }
66
67    pub fn owner_path(&self, root: &Path) -> PathBuf {
68        root.join(&self.host).join(&self.owner)
69    }
70
71    //let _ = git2::Repository::clone(repo, temp_dir_str);
72    /// Run `git clone` or `git pull` to update a single repository
73    pub fn update_repository(&self, root: &Path, clone: bool) -> Result<(), Box<dyn Error>> {
74        let owner_path = self.owner_path(root);
75        let current_dir = env::current_dir()?;
76        log::info!(
77            "Creating owner_path {:?} while current_dir is {:?}",
78            &owner_path,
79            &current_dir
80        );
81        fs::create_dir_all(&owner_path)?;
82        let repo_path = self.path(root);
83        if Path::new(&repo_path).exists() {
84            if clone {
85                log::info!("repo exist but we only clone now.  Skipping.");
86            } else {
87                log::info!("repo exist; cd to {:?}", &repo_path);
88                env::set_current_dir(&repo_path)?;
89                self.git_pull();
90            }
91        } else {
92            log::info!("new repo; cd to {:?}", &owner_path);
93            env::set_current_dir(owner_path)?;
94            self.git_clone();
95        }
96        env::set_current_dir(current_dir)?;
97        Ok(())
98    }
99
100    fn git_pull(&self) {
101        let current_dir = env::current_dir().unwrap();
102        log::info!("git pull in {current_dir:?}");
103
104        match Command::new("git").arg("pull").output() {
105            Ok(result) => {
106                if result.status.success() {
107                    log::info!(
108                        "git_pull exit code: '{}' in folder {:?}",
109                        result.status,
110                        current_dir
111                    );
112                } else {
113                    log::warn!(
114                        "git_pull exit code: '{}' in folder {:?}",
115                        result.status,
116                        current_dir
117                    );
118                }
119            }
120            Err(err) => {
121                log::error!("Could not run git_pull in folder {current_dir:?} error: {err}")
122            }
123        }
124    }
125
126    fn git_clone(&self) {
127        let current_dir = env::current_dir().unwrap();
128        let url = self.url();
129        log::info!("git clone {url} in {current_dir:?}");
130        match Command::new("git").arg("clone").arg(self.url()).output() {
131            Ok(result) => {
132                if result.status.success() {
133                    log::info!("git_clone exit code: '{}'", result.status);
134                } else {
135                    log::warn!(
136                        "git_clone exit code: '{}' for url '{}' in '{current_dir:?}'",
137                        result.status,
138                        url,
139                    );
140                }
141            }
142            Err(err) => {
143                log::error!("Could not run `git clone {url}` in {current_dir:?} error: {err}")
144            }
145        }
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn test_get_owner_and_repo() {
155        let root = Path::new("/tmp");
156        let expected = Repository::new("github.com", "szabgab", "rust-digger");
157
158        let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
159        assert_eq!(repo, expected);
160        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
161        assert_eq!(
162            repo.path(root).to_str(),
163            Some("/tmp/github.com/szabgab/rust-digger")
164        );
165
166        let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
167        assert_eq!(repo, expected);
168        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
169
170        let repo = Repository::from_url("http://github.com/szabgab/rust-digger/").unwrap();
171        assert_eq!(repo, expected);
172        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
173
174        let repo = Repository::from_url(
175            "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
176        )
177        .unwrap();
178        assert_eq!(
179            repo,
180            Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
181        );
182        assert_eq!(
183            repo.url(),
184            "https://github.com/crypto-crawler/crypto-crawler-rs"
185        );
186
187        let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
188        assert_eq!(
189            repo,
190            Repository::new("gitlab.com", "szabgab", "rust-digger")
191        );
192        assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
193
194        let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
195        assert_eq!(
196            repo,
197            Repository::new("gitlab.com", "szabgab", "rust-digger")
198        );
199
200        let res = Repository::from_url("https://blabla.com/");
201        assert!(res.is_err());
202        assert_eq!(
203            res.unwrap_err().to_string(),
204            "No match for repo in 'https://blabla.com/'"
205        );
206    }
207}