1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10const URL_REGEXES: [&str; 3] = [
11 "^https?://(github.com)/([^/]+)/([^/]+)/?.*$",
12 "^https?://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
13 "^https?://(salsa.debian.org)/([^/]+)/([^/]+)/?.*$",
14];
15
16#[derive(Debug, PartialEq)]
17#[allow(dead_code)]
18pub struct Repository {
19 host: String,
20 owner: String,
21 repo: String,
22}
23
24#[allow(dead_code)]
25impl Repository {
26 fn new(host: &str, owner: &str, repo: &str) -> Self {
28 Self {
29 host: host.to_string(),
30 owner: owner.to_string(),
31 repo: repo.to_string(),
32 }
33 }
34
35 pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
43 static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
44 URL_REGEXES
45 .iter()
46 .map(|reg| Regex::new(reg).unwrap())
47 .collect::<Vec<Regex>>()
48 });
49
50 for re in REGS.iter() {
51 if let Some(repo_url) = re.captures(url) {
52 let host = repo_url[1].to_lowercase();
53 let owner = repo_url[2].to_lowercase();
54 let repo = repo_url[3].to_lowercase();
55 return Ok(Self { host, owner, repo });
56 }
57 }
58 Err(format!("No match for repo in '{}'", &url).into())
59 }
60
61 pub fn url(&self) -> String {
62 format!("https://{}/{}/{}", self.host, self.owner, self.repo)
63 }
64
65 pub fn path(&self, root: &Path) -> PathBuf {
66 self.owner_path(root).join(&self.repo)
67 }
68
69 pub fn owner_path(&self, root: &Path) -> PathBuf {
70 root.join(&self.host).join(&self.owner)
71 }
72
73 pub fn get_owner(&self) -> &str {
74 &self.owner
75 }
76
77 pub fn is_github(&self) -> bool {
78 &self.host == "github.com"
79 }
80
81 pub fn is_gitlab(&self) -> bool {
82 ["gitlab.com", "salsa.debian.org"].contains(&self.host.as_str())
83 }
84
85 pub fn update_repository(&self, root: &Path, clone: bool) -> Result<(), Box<dyn Error>> {
88 let owner_path = self.owner_path(root);
89 let current_dir = env::current_dir()?;
90 log::info!(
91 "Creating owner_path {:?} while current_dir is {:?}",
92 &owner_path,
93 ¤t_dir
94 );
95 fs::create_dir_all(&owner_path)?;
96 let repo_path = self.path(root);
97 if Path::new(&repo_path).exists() {
98 if clone {
99 log::info!("repo exist but we only clone now. Skipping.");
100 } else {
101 log::info!("repo exist; cd to {:?}", &repo_path);
102 env::set_current_dir(&repo_path)?;
103 self.git_pull();
104 }
105 } else {
106 log::info!("new repo; cd to {:?}", &owner_path);
107 env::set_current_dir(owner_path)?;
108 self.git_clone();
109 }
110 env::set_current_dir(current_dir)?;
111 Ok(())
112 }
113
114 fn git_pull(&self) {
115 if !self.check_url() {
116 log::error!("Repository URL is not reachable: {}", self.url());
117 return;
118 }
119
120 let current_dir = env::current_dir().unwrap();
121 log::info!("git pull in {current_dir:?}");
122
123 match Command::new("git").arg("pull").output() {
124 Ok(result) => {
125 if result.status.success() {
126 log::info!(
127 "git_pull exit code: '{}' in folder {:?}",
128 result.status,
129 current_dir
130 );
131 } else {
132 log::warn!(
133 "git_pull exit code: '{}' in folder {:?}",
134 result.status,
135 current_dir
136 );
137 }
138 }
139 Err(err) => {
140 log::error!("Could not run git_pull in folder {current_dir:?} error: {err}")
141 }
142 }
143 }
144
145 fn git_clone(&self) {
146 if !self.check_url() {
147 log::error!("Repository URL is not reachable: {}", self.url());
148 return;
149 }
150
151 let current_dir = env::current_dir().unwrap();
152
153 let url = self.url();
154 log::info!("git clone {url} in {current_dir:?}");
155
156 match Command::new("git").arg("clone").arg(self.url()).output() {
157 Ok(result) => {
158 if result.status.success() {
159 log::info!("git_clone exit code: '{}'", result.status);
160 } else {
161 log::warn!(
162 "git_clone exit code: '{}' for url '{}' in '{current_dir:?}'",
163 result.status,
164 url,
165 );
166 }
167 }
168 Err(err) => {
169 log::error!("Could not run `git clone {url}` in {current_dir:?} error: {err}")
170 }
171 }
172 }
173
174 pub fn check_url(&self) -> bool {
175 let url = self.url();
176 let response = ureq::get(&url).call();
177 match response {
178 Ok(_) => true,
179 Err(err) => {
180 log::error!("Error checking URL '{}': {}", url, err);
181 false
182 }
183 }
184 }
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190
191 #[test]
192 fn test_get_owner_and_repo() {
193 let root = Path::new("/tmp");
194 let expected = Repository::new("github.com", "szabgab", "rust-digger");
195
196 let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
198 assert_eq!(repo, expected);
199 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
200 assert_eq!(
201 repo.path(root).to_str(),
202 Some("/tmp/github.com/szabgab/rust-digger")
203 );
204 assert!(repo.is_github());
205 assert!(!repo.is_gitlab());
206 assert_eq!(repo.get_owner(), "szabgab");
207
208 let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
210 assert_eq!(repo, expected);
211 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
212 assert!(repo.is_github());
213
214 let repo = Repository::from_url("http://github.com/szabgab/rust-digger/").unwrap();
216 assert_eq!(repo, expected);
217 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
218 assert!(repo.is_github());
219
220 let repo = Repository::from_url(
222 "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
223 )
224 .unwrap();
225 assert_eq!(
226 repo,
227 Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
228 );
229 assert_eq!(
230 repo.url(),
231 "https://github.com/crypto-crawler/crypto-crawler-rs"
232 );
233 assert!(repo.is_github());
234
235 let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
237 assert_eq!(
238 repo,
239 Repository::new("gitlab.com", "szabgab", "rust-digger")
240 );
241 assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
242 assert!(!repo.is_github());
243 assert!(repo.is_gitlab());
244
245 let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
247 assert_eq!(
248 repo,
249 Repository::new("gitlab.com", "szabgab", "rust-digger")
250 );
251 assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
252 assert_eq!(repo.owner, "szabgab");
253 assert_eq!(repo.repo, "rust-digger");
254 assert_eq!(
255 repo.path(root).to_str(),
256 Some("/tmp/gitlab.com/szabgab/rust-digger")
257 );
258
259 let repo = Repository::from_url("https://salsa.debian.org/szabgab/rust-digger/").unwrap();
261 assert_eq!(
262 repo,
263 Repository::new("salsa.debian.org", "szabgab", "rust-digger")
264 );
265 assert_eq!(repo.url(), "https://salsa.debian.org/szabgab/rust-digger");
266 assert_eq!(repo.owner, "szabgab");
267 assert_eq!(repo.repo, "rust-digger");
268 assert_eq!(
269 repo.path(root).to_str(),
270 Some("/tmp/salsa.debian.org/szabgab/rust-digger")
271 );
272 assert!(!repo.is_github());
273 assert!(repo.is_gitlab());
274
275 let res = Repository::from_url("https://blabla.com/");
277 assert!(res.is_err());
278 assert_eq!(
279 res.unwrap_err().to_string(),
280 "No match for repo in 'https://blabla.com/'"
281 );
282 }
283
284 #[test]
285 fn test_check_good_url() {
286 let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
287 assert!(repo.check_url());
288 }
289
290 #[test]
291 fn test_check_missing_url() {
292 let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
293 assert!(!repo.check_url());
294 }
295
296 #[test]
297 fn test_clone_missing_repo() {
298 let temp_folder = tempfile::tempdir().unwrap();
299 let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
300 repo.update_repository(Path::new(temp_folder.path()), true)
301 .unwrap();
302 let owner_path = temp_folder.path().join("github.com").join("szabgab");
303 assert!(owner_path.exists());
304 assert!(!owner_path.join("no-such-repo").exists());
305 }
306
307 #[test]
308 fn test_clone_this_repo() {
309 let temp_folder = tempfile::tempdir().unwrap();
310 let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
311 repo.update_repository(Path::new(temp_folder.path()), true)
312 .unwrap();
313 let owner_path = temp_folder.path().join("github.com").join("szabgab");
314 assert!(owner_path.exists());
315 assert!(owner_path.join("git-digger").exists());
316 }
317}