git_fetch_commits/
lib.rs

1use byte_unit::{Byte, UnitType};
2use clap::Parser;
3use git2::{Cred, Diff, RemoteCallbacks, Sort};
4use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
5use serde::Serialize;
6use std::{cell::Cell, str::FromStr};
7use tempfile::tempdir;
8
9#[derive(Serialize, Clone, Debug)]
10struct FileChange {
11    path: String,
12    lines_added: u32,
13    lines_removed: u32,
14    lines_modified: u32,
15    hunks_added: u32,
16    hunks_removed: u32,
17    hunks_modified: u32,
18}
19
20#[derive(Serialize, Debug, Clone)]
21enum CommitType {
22    Normal,
23    Merge,
24}
25
26#[derive(Serialize, Debug)]
27struct Commit {
28    id: String,
29    parents: Vec<String>,
30    repo_url: String,
31    timestamp: i64,
32    author_name: String,
33    author_email: String,
34    message: String,
35    r#type: CommitType,
36    changes: Vec<FileChange>,
37}
38
39#[derive(Serialize, Debug)]
40struct FlatCommit {
41    id: String,
42    parents: Vec<String>,
43    repo_url: String,
44    timestamp: i64,
45    author_name: String,
46    author_email: String,
47    message: String,
48    r#type: CommitType,
49    path: String,
50    lines_added: u32,
51    lines_removed: u32,
52    lines_modified: u32,
53    hunks_added: u32,
54    hunks_removed: u32,
55    hunks_modified: u32,
56}
57
58fn extract_from_diff(diff: &Diff) -> Result<Vec<FileChange>, git2::Error> {
59    // diff.foreach works in a very imperative way, looping through the diffs
60    // and calling callbacks in serial until it's complete
61    //
62    let mut files: Vec<FileChange> = Vec::new();
63
64    // Using a cell so we can modify the captured FileChange via the multiple closures below
65    // without ownership issues
66    //
67    let x: Cell<Option<FileChange>> = Cell::new(None);
68
69    diff.foreach(
70        &mut |diff_delta, _s| {
71            match x.take() {
72                // If we're currently busy with a file, this means we're moving on so we
73                // should push it and replace it with the new one
74                //
75                Some(file_change) => {
76                    files.push(file_change);
77                }
78                _ => {}
79            }
80            let filename = diff_delta.new_file().path().unwrap().to_str().unwrap();
81
82            x.set(Some(FileChange {
83                path: String::from_str(filename).unwrap(),
84                lines_added: 0,
85                lines_removed: 0,
86                lines_modified: 0,
87                hunks_added: 0,
88                hunks_removed: 0,
89                hunks_modified: 0,
90            }));
91            true
92        },
93        None,
94        Some(&mut |_diff_delta, diff_hunk| {
95            // Guaranteed to be processing a file (big assumption?)
96            //
97            let state = x.take().unwrap();
98            let updated = match (diff_hunk.old_lines(), diff_hunk.new_lines()) {
99                (0, _) => FileChange {
100                    hunks_added: state.hunks_added + 1,
101                    ..state
102                },
103                (_, 0) => FileChange {
104                    hunks_removed: state.hunks_removed + 1,
105                    ..state
106                },
107                (_, _) => FileChange {
108                    hunks_modified: state.hunks_modified + 1,
109                    ..state
110                },
111            };
112            x.set(Some(updated));
113            true
114        }),
115        Some(&mut |_diff_delta, _diff_hunk, diff_line| {
116            // Guaranteed to be processing a file (big assumption?)
117            //
118            let state = x.take().unwrap();
119            let updated = match (diff_line.old_lineno(), diff_line.new_lineno()) {
120                (None, Some(_)) => FileChange {
121                    lines_added: state.lines_added + 1,
122                    ..state
123                },
124                (Some(_), None) => FileChange {
125                    lines_removed: state.lines_removed + 1,
126                    ..state
127                },
128                (Some(_), Some(_)) => FileChange {
129                    lines_modified: state.lines_modified + 1,
130                    ..state
131                },
132                // Both being None is weird... don't think possible?
133                _ => state,
134            };
135            x.set(Some(updated));
136            true
137        }),
138    )?;
139
140    // Once we fall out the bottom we're done and we should push the final file update to were processing
141    //
142    match x.take() {
143        Some(file_change) => {
144            files.push(file_change);
145        }
146        _ => {}
147    }
148
149    Ok(files)
150}
151
152pub fn extract_logs(args: &Args) -> Result<(), git2::Error> {
153    let multiprogress = MultiProgress::new();
154
155    let sty = ProgressStyle::with_template(
156        "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
157    )
158    .unwrap()
159    .progress_chars("#>-");
160
161    let progress_client = multiprogress.add(ProgressBar::new(1));
162    let progress_server = multiprogress.add(ProgressBar::new(1));
163    let progress_transfer = multiprogress.add(ProgressBar::new(1));
164    let progress_indexing = multiprogress.add(ProgressBar::new(1));
165    progress_client.set_style(sty.clone());
166    progress_client.set_message("Client progress");
167    progress_server.set_style(sty.clone());
168    progress_server.set_message("Server progress");
169    progress_transfer.set_style(sty.clone());
170    progress_transfer.set_message("Transfer progress");
171    progress_indexing.set_style(sty.clone());
172    progress_indexing.set_message("Indexing progress");
173
174    let mut callbacks = RemoteCallbacks::new();
175
176    callbacks.credentials(|_url, username_from_url, allowed_types| {
177        // eprintln!(
178        //     "Credentials callback for url={} username={} allowed={:?}",
179        //     url,
180        //     username_from_url.unwrap_or("none"),
181        //     allowed_types
182        // );
183
184        progress_client.set_message("Authenticating...");
185
186        if allowed_types.is_ssh_key() {
187            // Provide ssh key from current agent
188            //
189            Cred::ssh_key_from_agent(username_from_url.unwrap_or("none"))
190        } else if allowed_types.is_user_pass_plaintext() {
191            // Provide plaintext username / password if provided in args
192            //
193            match (
194                args.plaintext_username.as_ref(),
195                args.plaintext_password.as_ref(),
196            ) {
197                (Some(username), Some(password)) => Cred::userpass_plaintext(username, password),
198                _ => {
199                    eprintln!("Asked for plaintext credentials but none provided - maybe add -U and -P options?");
200                    Cred::default()
201                }
202            }
203        } else {
204            eprintln!("Unsupported credential type - returning default (which probably will fail)");
205            Cred::default()
206        }
207    });
208
209    callbacks.transfer_progress(|progress| {
210        let received_byte = Byte::from_u128(u128::try_from(progress.received_bytes()).unwrap())
211            .unwrap()
212            .get_appropriate_unit(UnitType::Binary);
213
214        // eprintln!(
215        //     "Progress => Received {} of {}, indexed {}, bytes {}",
216        //     progress.received_objects(),
217        //     progress.total_objects(),
218        //     progress.indexed_objects(),
219        //     adjusted_byte.get_appropriate_unit(UnitType::Binary)
220        // );
221
222        progress_transfer.set_length(progress.total_objects() as u64);
223        progress_transfer.set_position(progress.received_objects() as u64);
224        progress_transfer.set_message(format!("Tranferred {received_byte:.2}"));
225
226        progress_indexing.set_length(progress.total_objects() as u64);
227        progress_indexing.set_position(progress.indexed_objects() as u64);
228
229        true
230    });
231
232    callbacks.pack_progress(|pack_builder_stage, current, total| {
233        eprintln!(
234            "Packing => Stage {:?}, b {}, c {}",
235            pack_builder_stage, current, total
236        );
237        ()
238    });
239
240    callbacks.sideband_progress(|sb| {
241        let msg = format!(
242            "Server: {}",
243            String::from_utf8_lossy(sb)
244                .to_string()
245                .replace("\n", "")
246                .replace("\r", "")
247        );
248        progress_server.set_message(msg);
249        true
250    });
251
252    let mut fo = git2::FetchOptions::new();
253    fo.remote_callbacks(callbacks);
254
255    let mut builder = git2::build::RepoBuilder::new();
256    builder.fetch_options(fo);
257
258    let temp_dir = tempdir().map_err(|_e| git2::Error::from_str("TempDir failed!"))?;
259    // eprintln!("Using tempdir => {}", temp_dir.path().to_str().unwrap());
260    progress_client.set_message("Cloning...");
261    let repo = builder.clone(args.repo_url.as_str(), &temp_dir.path())?;
262    progress_client.set_message("Clone complete");
263
264    // Create the revwalk
265    //
266    let mut revwalk = repo.revwalk()?;
267    revwalk.set_sorting(Sort::TIME)?;
268    // eprintln!("Adding head");
269    revwalk.push_head()?;
270
271    // Complete all progress bars
272    //
273    progress_client.finish();
274    progress_server.finish();
275    progress_transfer.finish();
276    progress_indexing.finish();
277
278    // Add all branches to the revwalk
279    //
280    let branches = repo.branches(None)?;
281    for branch_r in branches {
282        if let Ok((branch, _branch_type)) = branch_r {
283            if !branch.is_head() {
284                // eprintln!(
285                //     "Adding branch => {} {:?}",
286                //     branch.name()?.unwrap_or("unnamed"),
287                //     branch_type
288                // );
289                if let Some(target) = branch.get().target() {
290                    revwalk.push(target)?;
291                } else {
292                    // eprintln!("No valid oid...");
293                }
294            }
295        }
296    }
297
298    while let Some(Ok(oid)) = revwalk.next() {
299        let commit = repo.find_commit(oid)?;
300        let commit_tree = repo.find_tree(commit.tree_id()).unwrap();
301
302        // println!(
303        //     "Oid => {}, Author => {} {}, Message => {}",
304        //     oid,
305        //     author.name().unwrap_or("unknown"),
306        //     author.email().unwrap_or("unknown"),
307        //     commit.message().unwrap_or("unknown")
308        // );
309
310        // ignore any commits which have more than 1 parent (i.e. a merge)
311        //
312        let parent_commit = if commit.parent_count() == 0 {
313            // Its the origin commit (the seed of the tree)
314            //
315            None
316        } else {
317            Some(commit.parent(0).unwrap().tree_id())
318        };
319
320        let parent_tree = parent_commit.map(|oid| repo.find_tree(oid).unwrap());
321
322        let default_commit = Commit {
323            id: oid.to_string(),
324            parents: commit.parents().map(|p| p.id().to_string()).collect(),
325            r#type: CommitType::Normal,
326            repo_url: args.repo_url.to_string(),
327            timestamp: commit.time().seconds(),
328            author_name: commit.author().name().unwrap_or("unknown").to_string(),
329            author_email: commit.author().email().unwrap_or("unknown").to_string(),
330            message: commit.message().unwrap_or("unknown").to_string(),
331            changes: Vec::new(),
332        };
333
334        // If it's a merge type (multiple parents), then we ignore the file changes
335        //
336        let my_commit = if commit.parent_count() > 1 {
337            Commit {
338                r#type: CommitType::Merge,
339                ..default_commit
340            }
341        }
342        // If it's a normal single (or zero) parent commit, then we process the diff
343        // to the parent to grab file changes
344        //
345        else {
346            let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_tree), None)?;
347            let file_changes = extract_from_diff(&diff)?;
348            Commit {
349                r#type: CommitType::Normal,
350                changes: file_changes,
351                ..default_commit
352            }
353        };
354
355        let flat: Vec<FlatCommit> = my_commit
356            .changes
357            .iter()
358            .map(|change| FlatCommit {
359                id: my_commit.id.clone(),
360                parents: my_commit.parents.clone(),
361                r#type: my_commit.r#type.clone(),
362                repo_url: my_commit.repo_url.clone(),
363                timestamp: my_commit.timestamp.clone(),
364                author_name: my_commit.author_name.clone(),
365                author_email: my_commit.author_email.clone(),
366                message: my_commit.message.clone(),
367                path: change.path.clone(),
368                lines_added: change.lines_added.clone(),
369                lines_removed: change.lines_removed.clone(),
370                lines_modified: change.lines_modified.clone(),
371                hunks_added: change.hunks_added.clone(),
372                hunks_removed: change.hunks_removed.clone(),
373                hunks_modified: change.hunks_modified.clone(),
374            })
375            .collect();
376
377        flat.iter().for_each(|f| {
378            let my_flat_json =
379                serde_json::to_string(&f).map_err(|_e| git2::Error::from_str("Serde failed!"));
380
381            if let Ok(f) = my_flat_json {
382                println!("{}", f);
383            }
384        });
385    }
386
387    Ok(())
388}
389
390#[derive(Parser, Debug)]
391pub struct Args {
392    #[arg(short = 'U', help = "Username to provide for PLAINTEXT auth")]
393    plaintext_username: Option<String>,
394
395    #[arg(short = 'P', help = "Password to provide for PLAINTEXT auth")]
396    plaintext_password: Option<String>,
397
398    #[arg(help = "The URL of the repository to clone")]
399    repo_url: String,
400}