1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10const URL_REGEXES: [&str; 3] = [
11 "^https?://(github.com)/([^/]+)/([^/]+)/?.*$",
12 "^https?://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
13 "^https?://(salsa.debian.org)/([^/]+)/([^/]+)/?.*$",
14];
15
16#[derive(Debug, PartialEq)]
17#[allow(dead_code)]
18pub struct Repository {
19 host: String,
20 owner: String,
21 repo: String,
22}
23
24#[allow(dead_code)]
25impl Repository {
26 pub fn new(host: &str, owner: &str, repo: &str) -> Self {
28 Self {
29 host: host.to_string(),
30 owner: owner.to_string(),
31 repo: repo.to_string(),
32 }
33 }
34
35 pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
43 static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
44 URL_REGEXES
45 .iter()
46 .map(|reg| Regex::new(reg).unwrap())
47 .collect::<Vec<Regex>>()
48 });
49
50 for re in REGS.iter() {
51 if let Some(repo_url) = re.captures(url) {
52 let host = repo_url[1].to_lowercase();
53 let owner = repo_url[2].to_lowercase();
54 let repo = repo_url[3].to_lowercase();
55 return Ok(Self { host, owner, repo });
56 }
57 }
58 Err(format!("No match for repo in '{}'", &url).into())
59 }
60
61 pub fn url(&self) -> String {
62 format!("https://{}/{}/{}", self.host, self.owner, self.repo)
63 }
64
65 pub fn path(&self, root: &Path) -> PathBuf {
66 self.owner_path(root).join(&self.repo)
67 }
68
69 pub fn owner_path(&self, root: &Path) -> PathBuf {
70 root.join(&self.host).join(&self.owner)
71 }
72
73 pub fn get_owner(&self) -> &str {
74 &self.owner
75 }
76
77 pub fn is_github(&self) -> bool {
78 &self.host == "github.com"
79 }
80
81 pub fn is_gitlab(&self) -> bool {
82 ["gitlab.com", "salsa.debian.org"].contains(&self.host.as_str())
83 }
84
85 pub fn has_github_actions(&self, root: &Path) -> bool {
86 if !self.is_github() {
87 return false;
88 }
89
90 let path = self.path(root);
91 let dot_github = path.join(".github");
92 if !dot_github.exists() {
93 return false;
94 }
95
96 let workflow_dir = dot_github.join("workflows");
97 if !workflow_dir.exists() {
98 return false;
99 }
100
101 if let Ok(entries) = workflow_dir.read_dir() {
102 let yaml_count = entries
103 .filter_map(|entry| entry.ok())
104 .filter(|entry| {
105 entry
106 .path()
107 .extension()
108 .and_then(|ext| ext.to_str())
109 .map(|ext| ext == "yml" || ext == "yaml")
110 .unwrap_or(false)
111 })
112 .count();
113 if yaml_count > 0 {
114 return true;
115 }
116 }
117
118 false
119 }
120
121 pub fn update_repository(
124 &self,
125 root: &Path,
126 clone: bool,
127 depth: Option<usize>,
128 ) -> Result<(), Box<dyn Error>> {
129 let owner_path = self.owner_path(root);
130 let current_dir = env::current_dir()?;
131 log::info!(
132 "Creating owner_path {:?} while current_dir is {:?}",
133 &owner_path,
134 ¤t_dir
135 );
136 fs::create_dir_all(&owner_path)?;
137 let repo_path = self.path(root);
138 if Path::new(&repo_path).exists() {
139 if clone {
140 log::info!("repo exist but we only clone now. Skipping.");
141 } else {
142 log::info!("repo exist; cd to {:?}", &repo_path);
143 env::set_current_dir(&repo_path)?;
144 self.git_pull();
145 }
146 } else {
147 log::info!("new repo; cd to {:?}", &owner_path);
148 env::set_current_dir(owner_path)?;
149 self.git_clone(depth);
150 }
151 env::set_current_dir(current_dir)?;
152 Ok(())
153 }
154
155 fn git_pull(&self) {
156 if !self.check_url() {
157 log::error!("Repository URL is not reachable: {}", self.url());
158 return;
159 }
160
161 let current_dir = env::current_dir().unwrap();
162 log::info!("git pull in {current_dir:?}");
163
164 match Command::new("git").arg("pull").output() {
165 Ok(result) => {
166 if result.status.success() {
167 log::info!(
168 "git_pull exit code: '{}' in folder {:?}",
169 result.status,
170 current_dir
171 );
172 } else {
173 log::warn!(
174 "git_pull exit code: '{}' in folder {:?}",
175 result.status,
176 current_dir
177 );
178 }
179 }
180 Err(err) => {
181 log::error!("Could not run git_pull in folder {current_dir:?} error: {err}")
182 }
183 }
184 }
185
186 fn git_clone(&self, depth: Option<usize>) {
187 if !self.check_url() {
188 log::error!("Repository URL is not reachable: {}", self.url());
189 return;
190 }
191
192 let current_dir = env::current_dir().unwrap();
193
194 let url = self.url();
195 log::info!("git clone {url} in {current_dir:?}");
196
197 let mut cmd = Command::new("git");
198 cmd.arg("clone");
199 if let Some(depth) = depth {
200 cmd.arg(format!("--depth={depth}"));
201 }
202 match cmd.arg(self.url()).output() {
203 Ok(result) => {
204 if result.status.success() {
205 log::info!("git_clone exit code: '{}'", result.status);
206 } else {
207 log::warn!(
208 "git_clone exit code: '{}' for url '{}' in '{current_dir:?}'",
209 result.status,
210 url,
211 );
212 }
213 }
214 Err(err) => {
215 log::error!("Could not run `git clone {url}` in {current_dir:?} error: {err}")
216 }
217 }
218 }
219
220 pub fn check_url(&self) -> bool {
221 let url = self.url();
222 let response = ureq::get(&url).call();
223 match response {
224 Ok(_) => true,
225 Err(err) => {
226 log::error!("Error checking URL '{}': {}", url, err);
227 false
228 }
229 }
230 }
231}
232
233#[cfg(test)]
234mod tests {
235 use super::*;
236
237 #[test]
238 fn test_get_owner_and_repo() {
239 let root = Path::new("/tmp");
240 let expected = Repository::new("github.com", "szabgab", "rust-digger");
241
242 let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
244 assert_eq!(repo, expected);
245 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
246 assert_eq!(
247 repo.path(root).to_str(),
248 Some("/tmp/github.com/szabgab/rust-digger")
249 );
250 assert!(repo.is_github());
251 assert!(!repo.is_gitlab());
252 assert_eq!(repo.get_owner(), "szabgab");
253
254 let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
256 assert_eq!(repo, expected);
257 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
258 assert!(repo.is_github());
259
260 let repo = Repository::from_url("http://github.com/szabgab/rust-digger/").unwrap();
262 assert_eq!(repo, expected);
263 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
264 assert!(repo.is_github());
265
266 let repo = Repository::from_url(
268 "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
269 )
270 .unwrap();
271 assert_eq!(
272 repo,
273 Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
274 );
275 assert_eq!(
276 repo.url(),
277 "https://github.com/crypto-crawler/crypto-crawler-rs"
278 );
279 assert!(repo.is_github());
280
281 let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
283 assert_eq!(
284 repo,
285 Repository::new("gitlab.com", "szabgab", "rust-digger")
286 );
287 assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
288 assert!(!repo.is_github());
289 assert!(repo.is_gitlab());
290
291 let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
293 assert_eq!(
294 repo,
295 Repository::new("gitlab.com", "szabgab", "rust-digger")
296 );
297 assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
298 assert_eq!(repo.owner, "szabgab");
299 assert_eq!(repo.repo, "rust-digger");
300 assert_eq!(
301 repo.path(root).to_str(),
302 Some("/tmp/gitlab.com/szabgab/rust-digger")
303 );
304
305 let repo = Repository::from_url("https://salsa.debian.org/szabgab/rust-digger/").unwrap();
307 assert_eq!(
308 repo,
309 Repository::new("salsa.debian.org", "szabgab", "rust-digger")
310 );
311 assert_eq!(repo.url(), "https://salsa.debian.org/szabgab/rust-digger");
312 assert_eq!(repo.owner, "szabgab");
313 assert_eq!(repo.repo, "rust-digger");
314 assert_eq!(
315 repo.path(root).to_str(),
316 Some("/tmp/salsa.debian.org/szabgab/rust-digger")
317 );
318 assert!(!repo.is_github());
319 assert!(repo.is_gitlab());
320
321 let res = Repository::from_url("https://blabla.com/");
323 assert!(res.is_err());
324 assert_eq!(
325 res.unwrap_err().to_string(),
326 "No match for repo in 'https://blabla.com/'"
327 );
328 }
329
330 #[test]
331 fn test_check_good_url() {
332 let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
333 assert!(repo.check_url());
334 }
335
336 #[test]
337 fn test_check_missing_url() {
338 let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
339 assert!(!repo.check_url());
340 }
341
342 #[test]
343 fn test_clone_missing_repo() {
344 let temp_folder = tempfile::tempdir().unwrap();
345 let repo = Repository::from_url("https://github.com/szabgab/no-such-repo").unwrap();
346 repo.update_repository(Path::new(temp_folder.path()), true, None)
347 .unwrap();
348 let owner_path = temp_folder.path().join("github.com").join("szabgab");
349 assert!(owner_path.exists());
350 assert!(!owner_path.join("no-such-repo").exists());
351 }
352
353 #[test]
354 fn test_clone_this_repo() {
355 let temp_folder = tempfile::tempdir().unwrap();
356 let repo = Repository::from_url("https://github.com/szabgab/git-digger").unwrap();
357 repo.update_repository(Path::new(temp_folder.path()), true, None)
358 .unwrap();
359 let owner_path = temp_folder.path().join("github.com").join("szabgab");
360 assert!(owner_path.exists());
361 assert!(owner_path.join("git-digger").exists());
362 }
363}