1use std::env;
2use std::error::Error;
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::process::Command;
6
7use once_cell::sync::Lazy;
8use regex::Regex;
9
10const URL_REGEXES: [&str; 3] = [
11 "^https?://(github.com)/([^/]+)/([^/]+)/?.*$",
12 "^https?://(gitlab.com)/([^/]+)/([^/]+)/?.*$",
13 "^https?://(salsa.debian.org)/([^/]+)/([^/]+)/?.*$",
14];
15
16#[derive(Debug, PartialEq)]
17#[allow(dead_code)]
18pub struct Repository {
19 host: String,
20 owner: String,
21 repo: String,
22}
23
24#[allow(dead_code)]
25impl Repository {
26 fn new(host: &str, owner: &str, repo: &str) -> Self {
28 Self {
29 host: host.to_string(),
30 owner: owner.to_string(),
31 repo: repo.to_string(),
32 }
33 }
34
35 pub fn from_url(url: &str) -> Result<Self, Box<dyn Error>> {
43 static REGS: Lazy<Vec<Regex>> = Lazy::new(|| {
44 URL_REGEXES
45 .iter()
46 .map(|reg| Regex::new(reg).unwrap())
47 .collect::<Vec<Regex>>()
48 });
49
50 for re in REGS.iter() {
51 if let Some(repo_url) = re.captures(url) {
52 let host = repo_url[1].to_lowercase();
53 let owner = repo_url[2].to_lowercase();
54 let repo = repo_url[3].to_lowercase();
55 return Ok(Self { host, owner, repo });
56 }
57 }
58 Err(format!("No match for repo in '{}'", &url).into())
59 }
60
61 pub fn url(&self) -> String {
62 format!("https://{}/{}/{}", self.host, self.owner, self.repo)
63 }
64
65 pub fn path(&self, root: &Path) -> PathBuf {
66 self.owner_path(root).join(&self.repo)
67 }
68
69 pub fn owner_path(&self, root: &Path) -> PathBuf {
70 root.join(&self.host).join(&self.owner)
71 }
72
73 pub fn is_github(&self) -> bool {
74 &self.host == "github.com"
75 }
76
77 pub fn is_gitlab(&self) -> bool {
78 ["gitlab.com", "salsa.debian.org"].contains(&self.host.as_str())
79 }
80
81 pub fn update_repository(&self, root: &Path, clone: bool) -> Result<(), Box<dyn Error>> {
84 let owner_path = self.owner_path(root);
85 let current_dir = env::current_dir()?;
86 log::info!(
87 "Creating owner_path {:?} while current_dir is {:?}",
88 &owner_path,
89 ¤t_dir
90 );
91 fs::create_dir_all(&owner_path)?;
92 let repo_path = self.path(root);
93 if Path::new(&repo_path).exists() {
94 if clone {
95 log::info!("repo exist but we only clone now. Skipping.");
96 } else {
97 log::info!("repo exist; cd to {:?}", &repo_path);
98 env::set_current_dir(&repo_path)?;
99 self.git_pull();
100 }
101 } else {
102 log::info!("new repo; cd to {:?}", &owner_path);
103 env::set_current_dir(owner_path)?;
104 self.git_clone();
105 }
106 env::set_current_dir(current_dir)?;
107 Ok(())
108 }
109
110 fn git_pull(&self) {
111 if !self.check_url() {
112 log::error!("Repository URL is not reachable: {}", self.url());
113 return;
114 }
115
116 let current_dir = env::current_dir().unwrap();
117 log::info!("git pull in {current_dir:?}");
118
119 match Command::new("git").arg("pull").output() {
120 Ok(result) => {
121 if result.status.success() {
122 log::info!(
123 "git_pull exit code: '{}' in folder {:?}",
124 result.status,
125 current_dir
126 );
127 } else {
128 log::warn!(
129 "git_pull exit code: '{}' in folder {:?}",
130 result.status,
131 current_dir
132 );
133 }
134 }
135 Err(err) => {
136 log::error!("Could not run git_pull in folder {current_dir:?} error: {err}")
137 }
138 }
139 }
140
141 fn git_clone(&self) {
142 if !self.check_url() {
143 log::error!("Repository URL is not reachable: {}", self.url());
144 return;
145 }
146
147 let current_dir = env::current_dir().unwrap();
148
149 let url = self.url();
150 log::info!("git clone {url} in {current_dir:?}");
151
152 match Command::new("git").arg("clone").arg(self.url()).output() {
153 Ok(result) => {
154 if result.status.success() {
155 log::info!("git_clone exit code: '{}'", result.status);
156 } else {
157 log::warn!(
158 "git_clone exit code: '{}' for url '{}' in '{current_dir:?}'",
159 result.status,
160 url,
161 );
162 }
163 }
164 Err(err) => {
165 log::error!("Could not run `git clone {url}` in {current_dir:?} error: {err}")
166 }
167 }
168 }
169
170 fn check_url(&self) -> bool {
171 let url = self.url();
172 match reqwest::blocking::get(&url) {
173 Ok(_) => true,
174 Err(err) => {
175 log::error!("Error checking URL '{}': {}", url, err);
176 false
177 }
178 }
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn test_get_owner_and_repo() {
188 let root = Path::new("/tmp");
189 let expected = Repository::new("github.com", "szabgab", "rust-digger");
190
191 let repo = Repository::from_url("https://github.com/szabgab/rust-digger").unwrap();
193 assert_eq!(repo, expected);
194 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
195 assert_eq!(
196 repo.path(root).to_str(),
197 Some("/tmp/github.com/szabgab/rust-digger")
198 );
199 assert!(repo.is_github());
200 assert!(!repo.is_gitlab());
201
202 let repo = Repository::from_url("https://github.com/szabgab/rust-digger/").unwrap();
204 assert_eq!(repo, expected);
205 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
206 assert!(repo.is_github());
207
208 let repo = Repository::from_url("http://github.com/szabgab/rust-digger/").unwrap();
210 assert_eq!(repo, expected);
211 assert_eq!(repo.url(), "https://github.com/szabgab/rust-digger");
212 assert!(repo.is_github());
213
214 let repo = Repository::from_url(
216 "https://github.com/crypto-crawler/crypto-crawler-rs/tree/main/crypto-market-type",
217 )
218 .unwrap();
219 assert_eq!(
220 repo,
221 Repository::new("github.com", "crypto-crawler", "crypto-crawler-rs",)
222 );
223 assert_eq!(
224 repo.url(),
225 "https://github.com/crypto-crawler/crypto-crawler-rs"
226 );
227 assert!(repo.is_github());
228
229 let repo = Repository::from_url("https://gitlab.com/szabgab/rust-digger").unwrap();
231 assert_eq!(
232 repo,
233 Repository::new("gitlab.com", "szabgab", "rust-digger")
234 );
235 assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
236 assert!(!repo.is_github());
237 assert!(repo.is_gitlab());
238
239 let repo = Repository::from_url("https://gitlab.com/Szabgab/Rust-digger/").unwrap();
241 assert_eq!(
242 repo,
243 Repository::new("gitlab.com", "szabgab", "rust-digger")
244 );
245 assert_eq!(repo.url(), "https://gitlab.com/szabgab/rust-digger");
246 assert_eq!(repo.owner, "szabgab");
247 assert_eq!(repo.repo, "rust-digger");
248 assert_eq!(
249 repo.path(root).to_str(),
250 Some("/tmp/gitlab.com/szabgab/rust-digger")
251 );
252
253 let repo = Repository::from_url("https://salsa.debian.org/szabgab/rust-digger/").unwrap();
255 assert_eq!(
256 repo,
257 Repository::new("salsa.debian.org", "szabgab", "rust-digger")
258 );
259 assert_eq!(repo.url(), "https://salsa.debian.org/szabgab/rust-digger");
260 assert_eq!(repo.owner, "szabgab");
261 assert_eq!(repo.repo, "rust-digger");
262 assert_eq!(
263 repo.path(root).to_str(),
264 Some("/tmp/salsa.debian.org/szabgab/rust-digger")
265 );
266 assert!(!repo.is_github());
267 assert!(repo.is_gitlab());
268
269 let res = Repository::from_url("https://blabla.com/");
271 assert!(res.is_err());
272 assert_eq!(
273 res.unwrap_err().to_string(),
274 "No match for repo in 'https://blabla.com/'"
275 );
276 }
277}