glit_core/
repo.rs

1use crate::{
2    config::RepositoryConfig,
3    log::Log,
4    types::{AuthorName, BranchName},
5};
6use ahash::{HashMap, HashMapExt};
7use git2::{build::RepoBuilder, BranchType, Oid};
8use git2::{FetchOptions, RemoteCallbacks};
9
10use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
11
12use rand::distributions::{Alphanumeric, DistString};
13use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
14use reqwest::Url;
15use serde::{Deserialize, Serialize};
16use std::{
17    collections::BTreeMap,
18    fs::remove_dir_all,
19    path::{Path, PathBuf},
20    str::FromStr,
21    sync::{Arc, Mutex},
22    time::Instant,
23};
24
25const DEFAULT_PATH: &str = "/tmp";
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct Repository {
29    pub name: String,
30    pub owner: String,
31    branches: Vec<BranchName>,
32    #[serde(skip)]
33    clone_paths: Vec<PathBuf>,
34    pub branch_data: HashMap<BranchName, Committers>,
35}
36
37pub struct RepositoryFactory {
38    all_branches: bool,
39    branches: Vec<BranchName>,
40    url: Url,
41    //mpb: Arc<Mutex<MultiProgress>>,
42}
43
44impl RepositoryFactory {
45    pub fn with_config(repository_config: RepositoryConfig) -> Self {
46        let url = repository_config.url;
47        let all_branches: bool = repository_config.all_branches;
48
49        //let mpb = Arc::new(Mutex::new(MultiProgress::new()));
50
51        RepositoryFactory {
52            all_branches,
53            url,
54            branches: Vec::<BranchName>::new(),
55            //mpb,
56        }
57    }
58
59    fn get_head_branch(repo: &git2::Repository) -> String {
60        let head = repo.head();
61        if let Ok(head_ref) = head {
62            head_ref
63                .name()
64                .unwrap()
65                .split('/')
66                .last()
67                .unwrap()
68                .to_string()
69        } else {
70            "".to_string()
71        }
72    }
73
74    pub fn fetch_branches(repository: &git2::Repository, head: &str) -> Vec<BranchName> {
75        let mut branches = repository
76            .branches(Some(BranchType::Remote))
77            .unwrap()
78            .map(|b| {
79                let branch = b.unwrap().0;
80                let branch_name = branch.name().unwrap().unwrap();
81                let string_branch = branch_name.split("origin/").last().unwrap().to_string();
82                BranchName(string_branch)
83            })
84            .collect::<Vec<_>>();
85
86        branches.retain(|value| *value != BranchName("HEAD".to_string()));
87        branches.retain(|value| *value != BranchName(head.to_string())); // Do not clone default branch two time
88
89        branches
90    }
91
92    pub fn prepare_branch(branches: Vec<BranchName>) -> Vec<BranchName> {
93        branches
94            .iter()
95            .map(|branch| BranchName(branch.to_string().replace('/', "_")))
96            .collect::<Vec<_>>()
97    }
98
99    fn clone(
100        url: &Url,
101        repo_name: String,
102        path: &Path,
103        //mpb: Arc<Mutex<MultiProgress>>,
104    ) -> Result<git2::Repository, git2::Error> {
105        let pb_clone = ProgressBar::new(0);
106        let pb_delta = ProgressBar::new(0);
107
108        //mpb.lock().unwrap().add(pb_clone.clone());
109
110        let style_clone = ProgressStyle::with_template(
111            "🚧 CLONING    {msg}[{elapsed_precise}] [{wide_bar:.cyan/blue}] {human_pos}/{human_len} ",
112        )
113        .unwrap()
114        .progress_chars("#>-");
115        
116        let style_delta = ProgressStyle::with_template(
117            "🚀 RESOLVING  {msg}[{elapsed_precise}] [{wide_bar:.cyan/blue}] {human_pos}/{human_len} ",
118        )
119        .unwrap()
120        .progress_chars("#>-");
121        
122        pb_clone.set_style(style_clone);
123        pb_delta.set_style(style_delta);
124        let cb = create_multi_callback(repo_name, "default".to_string(), pb_clone, pb_delta ); //  , mpb
125
126        let mut fo = FetchOptions::new();
127        fo.remote_callbacks(cb);
128
129        let repo = RepoBuilder::new()
130            .bare(true)
131            .fetch_options(fo)
132            .clone(url.as_str(), path);
133
134        match repo {
135            Ok(_) => log::debug!("Cloning repo at {:?}", path.to_str().unwrap()),
136            Err(_) => {
137                log::error!("Failed to clone")
138            }
139        }
140
141        repo
142    }
143
144    fn clone_branches(
145        url: Url,
146        repo_name: String,
147        branches: Vec<BranchName>,
148        //mpb: Arc<Mutex<MultiProgress>>,
149    ) -> Vec<PathBuf> {
150        let repo_name = repo_name.replace('-', "_");
151
152        branches
153            .par_iter()
154            .map(|branch| {
155                let hash_suffix = Alphanumeric.sample_string(&mut rand::thread_rng(), 6);
156                let hashed_repo_name = format!("{}_{}", repo_name, hash_suffix);
157
158                let path = format!(
159                    "{}/{}/{}",
160                    DEFAULT_PATH,
161                    hashed_repo_name,
162                    branch.to_string(),
163                );
164
165                let branch_clone_path = PathBuf::from_str(&path).unwrap();
166
167                let pb_clone = ProgressBar::new(0);
168                let pb_delta = ProgressBar::new(0);
169
170                let style_clone = ProgressStyle::with_template(
171                    "🚧 CLONING    {msg}[{elapsed_precise}] [{wide_bar:.cyan/blue}] {human_pos}/{human_len} ",
172                )
173                .unwrap()
174                .progress_chars("#>-");
175                
176                let style_delta = ProgressStyle::with_template(
177                    "🚀 RESOLVING  {msg}[{elapsed_precise}] [{wide_bar:.cyan/blue}] {human_pos}/{human_len} ",
178                )
179                .unwrap()
180                .progress_chars("#>-");
181                
182                pb_clone.set_style(style_clone);
183                pb_delta.set_style(style_delta);
184                let cb = create_multi_callback(
185                    repo_name.clone(),
186                    branch.to_string(),
187                    pb_clone,
188                    pb_delta,
189                ); //,mpb
190
191                let mut fo = FetchOptions::new();
192                fo.remote_callbacks(cb);
193
194                let repo = RepoBuilder::new()
195                    .bare(true)
196                    .fetch_options(fo)
197                    .branch(&branch.to_string())
198                    .clone(url.clone().as_str(), &branch_clone_path);
199
200                match repo {
201                    Ok(_) => log::debug!(
202                        "[{:?}] Cloning branch : {:?} at {}",
203                        repo_name,
204                        branch,
205                        path
206                    ),
207                    Err(_) => {
208                        log::error!("Failed to clone {} with branch {:?}", repo_name, branch)
209                    }
210                }
211
212                branch_clone_path
213            })
214            .collect::<Vec<PathBuf>>()
215    }
216
217    pub fn create(mut self, mpb: Arc<Mutex<MultiProgress>>) -> Repository {
218        let mut path_segments = self.url.path_segments().unwrap();
219        let owner = path_segments.next().unwrap().to_string();
220        let repo_name = path_segments.next().unwrap().to_string();
221
222        // default location
223        let hash_suffix = Alphanumeric.sample_string(&mut rand::thread_rng(), 6);
224        let hashed_repo_name = format!("{}_{}", repo_name, hash_suffix);
225        let clone_location = PathBuf::from_str(&format!(
226            "{}/{}/{}",
227            DEFAULT_PATH, hashed_repo_name, "default"
228        ))
229        .unwrap();
230
231        let mut clone_paths: Vec<PathBuf> = Vec::new();
232        let repo = Self::clone(
233            &self.url,
234            repo_name.clone(),
235            clone_location.as_path(),
236            //mpb,
237        )
238        .unwrap();
239
240        let head = Self::get_head_branch(&repo);
241        if !head.is_empty() {
242            clone_paths.push(clone_location);
243        }
244
245        // Clone all branches
246        if self.all_branches {
247            let mut branches = Self::fetch_branches(&repo, &head);
248            let paths = Self::clone_branches(
249                self.url.clone(),
250                repo_name.clone(),
251                branches.clone(),
252                //self.mpb,
253            );
254
255            branches.push(BranchName(head));
256            self.branches = branches.clone();
257
258            clone_paths.extend(paths);
259        }
260        // Clone only default branch
261        else {
262            self.branches = vec![BranchName(head)];
263        }
264
265        Repository {
266            name: repo_name,
267            owner,
268            branches: self.branches.clone(),
269            clone_paths,
270            branch_data: HashMap::new(),
271        }
272    }
273}
274
275impl Repository {
276    pub fn extract_log(mut self) -> Repository {
277        self.branch_data = self
278            .branches
279            .clone()
280            .into_iter()
281            .zip(self.clone_paths.clone())
282            .map(|(br, pt)| {
283                let t1 = Instant::now();
284
285                let repo_data: Committers =
286                    Log::build(pt.clone(), self.name.clone(), br.to_string());
287
288                log::info!("Build log Time : {:?}", t1.elapsed());
289
290                let remove_path = pt.parent().unwrap();
291                let removal = remove_dir_all(remove_path);
292                match removal {
293                    Ok(_) => log::debug!("Cleaning - Delete folder at {:?}", &remove_path),
294                    Err(_) => log::error!("Failed to delete at {:?}", &remove_path),
295                }
296
297                (br, repo_data)
298            })
299            .collect::<HashMap<_, _>>();
300
301        self
302    }
303}
304
305type Mail = String;
306
307#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
308pub struct Committer {
309    pub mails: BTreeMap<Mail, Vec<Mail>>,
310}
311
312impl Committer {
313    pub fn new(mail: Mail, commit_id: String) -> Self {
314        let mut commits_for_mail = BTreeMap::new();
315        commits_for_mail.insert(mail, vec![commit_id]);
316
317        Self {
318            mails: commits_for_mail,
319        }
320    }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
324pub struct Committers {
325    pub committers: HashMap<AuthorName, Committer>,
326}
327
328impl Default for Committers {
329    fn default() -> Self {
330        Self::new()
331    }
332}
333
334impl Committers {
335    pub fn new() -> Self {
336        Self {
337            committers: HashMap::<AuthorName, Committer>::new(),
338        }
339    }
340
341    pub fn update(&mut self, repo: &git2::Repository, commit_id: Oid) -> &Self {
342        log::debug!("Looking in commit {}", commit_id);
343
344        let commit = repo.find_commit(commit_id).unwrap();
345        let commit_sigature = commit.author();
346        let author: AuthorName = AuthorName(commit_sigature.name().unwrap_or("").to_string());
347        let mail = commit_sigature.email().unwrap_or("").to_string();
348
349        self.committers
350            .entry(author)
351            .and_modify(|committer| {
352                // Author key exist. Need to modify it.
353                committer
354                    .mails
355                    .entry(mail.clone())
356                    .and_modify(|commit_ids| {
357                        // Mail Key exist
358                        commit_ids.push(commit_id.to_string());
359                    })
360                    .or_insert_with(||
361                        // Mail Key do not exist
362                        vec![commit_id.to_string()]);
363            })
364            .or_insert_with(||
365                // Author Key do not exist
366                Committer::new(mail, commit_id.to_string()));
367
368        // A little bit faster but not cleaner
369        //let committer = Committer::new(mail.clone(), commit_id.to_string());
370        //if self.committers.contains_key(&author) {
371        //    let mut existing_commiter = self.committers.get_mut(&author).unwrap().to_owned();
372        //
373        //    if !existing_commiter.mails.contains_key(&mail) {
374        //        existing_commiter.mails.insert(mail.clone(), vec![]);
375        //
376        //        self.committers.insert(author.clone(), existing_commiter);
377        //    }
378        //
379        //    // Update commit_id list
380        //    let mut actual_committer = self.committers.get_mut(&author).unwrap().to_owned();
381        //    let mut commit_ids = actual_committer.mails.get_mut(&mail).unwrap().to_owned();
382        //
383        //    commit_ids.push(commit_id.to_string());
384        //    actual_committer.mails.insert(mail, commit_ids);
385        //
386        //    // insert modified version of commiter
387        //    self.committers.insert(author, actual_committer);
388        //} else {
389        //    self.committers.insert(author.clone(), committer);
390        //}
391
392        self
393    }
394}
395
396fn create_multi_callback(
397    repo_name: String,
398    branch_name: String,
399    pb_clone: ProgressBar,
400    pb_delta: ProgressBar,
401) -> RemoteCallbacks<'static> {
402    let mut cb = RemoteCallbacks::new();
403
404    let mut is_clone_finished = false;
405    let mut is_delta_finished = false;
406    let mut delta_length_is_set = false;
407
408    cb.transfer_progress(move |stats| {
409        if stats.received_objects() == 0 {
410            pb_clone.set_message(format!("[{}][{}]", repo_name, branch_name));
411            pb_clone.set_length(stats.total_objects().try_into().unwrap());
412        }
413
414        if stats.indexed_deltas() > 0 && !delta_length_is_set {
415            pb_delta.set_message(format!("[{}][{}]", repo_name, branch_name));
416            pb_delta.set_length(stats.total_deltas().try_into().unwrap());
417            delta_length_is_set = true;
418        }
419
420        if (stats.received_objects() <= stats.total_objects()) && !is_clone_finished {
421            pb_clone.set_position(stats.received_objects().try_into().unwrap());
422            pb_clone.tick();
423            if stats.received_objects() == stats.total_objects() {
424                pb_clone.finish_with_message(format!(
425                    "[{} ✅][{} ✅]",
426                    repo_name.clone(),
427                    branch_name.clone()
428                ));
429                pb_clone.finish_and_clear();
430                is_clone_finished = true;
431            }
432        }
433
434        if (stats.indexed_deltas() <= stats.total_deltas())
435            && stats.total_deltas() > 0
436            && is_clone_finished
437            && !is_delta_finished
438        {
439            pb_delta.set_position(stats.indexed_deltas().try_into().unwrap());
440
441            if stats.indexed_deltas() == stats.total_deltas() {
442                pb_delta.finish_with_message(format!(
443                    "[{} ✅][{} ✅]",
444                    repo_name.clone(),
445                    branch_name.clone()
446                ));
447                is_delta_finished = true;
448            }
449        }
450
451        true
452    });
453
454    cb
455}
456
457//fn print(state: &mut State, pb_clone: &mut ProgressBar, pb_delta: &mut ProgressBar) {
458//    let stats = state.progress.as_ref().unwrap();
459//    let network_pct = (100 * stats.received_objects()) / stats.total_objects();
460//    let index_pct = (100 * stats.indexed_objects()) / stats.total_objects();
461//    let kbytes = stats.received_bytes() / 1024;
462//
463//    if stats.indexed_deltas() < stats.total_deltas() {
464//        pb_delta.set_position(stats.indexed_deltas().try_into().unwrap());
465//        //io::stdout().flush().unwrap();
466//    }
467//
468//    if stats.received_objects() < stats.total_objects() {
469//        pb_clone.set_position(stats.received_objects().try_into().unwrap());
470//        //io::stdout().flush().unwrap();
471//    }
472//}