git_digger/
lib.rs

1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10const URL_REGEXES: [&str; 2] = [
11    "^https://(github.com)/([^/]+)/([^/]+)/?.*$",
12    "^https://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
13];
14
15#[derive(Debug, PartialEq)]
16#[allow(dead_code)]
17pub struct Repository {
18    pub host: String,
19    pub owner: String,
20    pub repo: String,
21}
22
23#[allow(dead_code)]
24impl Repository {
25    fn new(host: &str, owner: &str, repo: &str) -> Self {
26        Self {
27            host: host.to_string(),
28            owner: owner.to_string(),
29            repo: repo.to_string(),
30        }
31    }
32
33    /// Extracts the owner and repository name from a URL.
34    ///
35    /// Returns Repository
36    ///
37    /// Where host is either "github" or "gitlab" for now.
38    ///
39    /// e.g. https://github.com/szabgab/rust-digger -> ("github", "szabgab", "rust-digger")
40    pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
41        static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
42            URL_REGEXES
43                .iter()
44                .map(|reg| Regex::new(reg).unwrap())
45                .collect::<Vec<Regex>>()
46        });
47
48        for re in REGS.iter() {
49            if let Some(repo_url) = re.captures(url) {
50                let host = repo_url[1].to_lowercase();
51                let owner = repo_url[2].to_lowercase();
52                let repo = repo_url[3].to_lowercase();
53                return Ok(Self { host, owner, repo });
54            }
55        }
56        Err(format!("No match for repo in '{}'", &url).into())
57    }
58
59    pub fn url(&self) -> String {
60        format!("https://{}/{}/{}", self.host, self.owner, self.repo)
61    }
62
63    pub fn path(&self, root: &Path) -> PathBuf {
64        root.join(&self.host).join(&self.owner).join(&self.repo)
65    }
66}
67
68/// Run `git clone` or `git pull` to update a single repository
69pub fn update_single_repository(
70    repos_folder: &Path,
71    host: &str,
72    owner: &str,
73    repo: &str,
74    repository_url: &str,
75    clone: bool,
76) -> Result<(), Box<dyn Error>> {
77    let owner_path = repos_folder.join(host).join(owner);
78    let current_dir = env::current_dir()?;
79    log::info!(
80        "Creating owner_path {:?} while current_dir is {:?}",
81        &owner_path,
82        &current_dir
83    );
84    fs::create_dir_all(&owner_path)?;
85    let repo_path = owner_path.join(repo);
86    if Path::new(&repo_path).exists() {
87        if clone {
88            log::info!("repo exist but we only clone now.  Skipping.");
89        } else {
90            log::info!("repo exist; cd to {:?}", &repo_path);
91            env::set_current_dir(&repo_path)?;
92            git_pull();
93        }
94    } else {
95        log::info!("new repo; cd to {:?}", &owner_path);
96        env::set_current_dir(owner_path)?;
97        git_clone(repository_url, repo);
98    }
99    env::set_current_dir(current_dir)?;
100    Ok(())
101}
102
103fn git_pull() {
104    log::info!("git pull");
105    let current_dir = env::current_dir().unwrap();
106
107    match Command::new("git").arg("pull").output() {
108        Ok(result) => {
109            if result.status.success() {
110                log::info!(
111                    "git_pull exit code: '{}' in folder {:?}",
112                    result.status,
113                    current_dir
114                );
115            } else {
116                log::warn!(
117                    "git_pull exit code: '{}' in folder {:?}",
118                    result.status,
119                    current_dir
120                );
121            }
122        }
123        Err(err) => log::error!("Could not run git_pull in folder {current_dir:?} error: {err}"),
124    }
125}
126
127fn git_clone(url: &str, path: &str) {
128    log::info!("git clone {} {}", url, path);
129    match Command::new("git").arg("clone").arg(url).arg(path).output() {
130        Ok(result) => {
131            if result.status.success() {
132                log::info!("git_clone exit code: '{}'", result.status);
133            } else {
134                log::warn!(
135                    "git_clone exit code: '{}' for url '{}' cloning to '{}'",
136                    result.status,
137                    url,
138                    path
139                );
140            }
141        }
142        Err(err) => log::error!("Could not run git_clone {url} {path} error: {err}"),
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    #[test]
151    fn test_get_owner_and_repo() {
152        let root = Path::new("/tmp");
153        let expected = Repository::new("github.com", "szabgab", "rust-digger");
154
155        let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
156        assert_eq!(repo, expected);
157        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
158        assert_eq!(
159            repo.path(root).to_str(),
160            Some("/tmp/github.com/szabgab/rust-digger")
161        );
162
163        let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
164        assert_eq!(repo, expected);
165        assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
166
167        let repo = Repository::from_url(
168            "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
169        )
170        .unwrap();
171        assert_eq!(
172            repo,
173            Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
174        );
175        assert_eq!(
176            repo.url(),
177            "https://github.com/crypto-crawler/crypto-crawler-rs"
178        );
179
180        let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
181        assert_eq!(
182            repo,
183            Repository::new("gitlab.com", "szabgab", "rust-digger")
184        );
185        assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
186
187        let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
188        assert_eq!(
189            repo,
190            Repository::new("gitlab.com", "szabgab", "rust-digger")
191        );
192
193        let res = Repository::from_url("https://blabla.com/");
194        assert!(res.is_err());
195        assert_eq!(
196            res.unwrap_err().to_string(),
197            "No match for repo in 'https://blabla.com/'"
198        );
199    }
200}