use crate::SecretScanner;
use chrono::NaiveDateTime;
use encoding::all::ASCII;
use encoding::{DecoderTrap, Encoding};
use git2::{Commit, DiffFormat};
use git2::{DiffOptions, Repository, Time};
use log::{self, info};
use regex::bytes::Matches;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashSet};
use std::path::Path;
use std::{str, fmt};
use url::{ParseError, Url};
use std::hash::{Hash, Hasher};
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Default)]
pub struct GitFinding {
pub commit: String,
#[serde(rename = "commitHash")]
pub commit_hash: String,
pub date: String,
pub diff: String,
#[serde(rename = "stringsFound")]
pub strings_found: Vec<String>,
pub path: String,
pub reason: String,
}
pub enum GitScheme {
Localpath,
Http,
Ssh,
Relativepath,
Git,
}
pub struct GitScanner {
pub secret_scanner: SecretScanner,
pub repo: Option<Repository>,
pub scheme: Option<GitScheme>,
}
impl GitScanner {
pub fn new_from_scanner(secret_scanner: SecretScanner) -> Self {
Self {
secret_scanner,
repo: None,
scheme: None,
}
}
pub fn new() -> Self { Self { secret_scanner: SecretScanner::default(), repo: None, scheme: None } }
pub fn perform_scan(
&self,
glob: Option<&str>,
since_commit: Option<&str>,
until_commit: Option<&str>,
scan_entropy: bool,
) -> HashSet<GitFinding> {
let repo_option = self.repo.as_ref(); let repo = repo_option.unwrap();
let mut revwalk = repo.revwalk().unwrap();
revwalk.push_glob(glob.unwrap_or_else(|| "*")).unwrap();
let since_time_obj: Time = match since_commit {
Some(sc) => {
let revspec = match repo.revparse(sc) {
Ok(r) => r,
Err(e) => panic!("SINCECOMMIT value returned an error: {:?}", e),
};
let o = revspec.from().unwrap();
o.as_commit().unwrap().time()
}
None => Time::new(0, 0),
};
let until_time_obj: Time = match until_commit {
Some(sc) => {
let revspec = match repo.revparse(sc) {
Ok(r) => r,
Err(e) => panic!("UNTILCOMMIT value returned an error: {:?}", e),
};
let o = revspec.from().unwrap();
o.as_commit().unwrap().time()
}
None => Time::new(i64::max_value(), 0),
};
let revwalk = revwalk.map(|id| repo.find_commit(id.unwrap())).filter(|c| {
c.as_ref().unwrap().time() >= since_time_obj
&& c.as_ref().unwrap().time() <= until_time_obj
});
let mut findings: HashSet<GitFinding> = HashSet::new();
for commit in revwalk {
let commit: Commit = commit.unwrap();
info!("Scanning commit {}", commit.id());
if commit.parents().len() > 1 {
continue;
}
let a = if commit.parents().len() == 1 {
let parent = commit.parent(0).unwrap();
Some(parent.tree().unwrap())
} else {
None
};
let b = commit.tree().unwrap();
let mut diffopts = DiffOptions::new();
diffopts.force_binary(true);
let diff = repo
.diff_tree_to_tree(a.as_ref(), Some(&b), Some(&mut diffopts))
.unwrap();
diff.print(DiffFormat::Patch, |delta, _hunk, line| {
let new_line = line.content();
let matches_map: BTreeMap<&String, Matches> = self.secret_scanner.matches(new_line);
for (reason, match_iterator) in matches_map {
let mut secrets: Vec<String> = Vec::new();
for matchobj in match_iterator {
secrets.push(
ASCII
.decode(
&new_line[matchobj.start()..matchobj.end()],
DecoderTrap::Ignore,
)
.unwrap_or_else(|_| "<STRING DECODE ERROR>".parse().unwrap()),
);
}
if !secrets.is_empty() {
findings.insert(GitFinding {
commit_hash: commit.id().to_string(),
commit: commit.message().unwrap().to_string(),
diff: ASCII
.decode(&new_line, DecoderTrap::Ignore)
.unwrap_or_else(|_| "<STRING DECODE ERROR>".parse().unwrap()),
date: NaiveDateTime::from_timestamp(commit.time().seconds(), 0)
.to_string(),
strings_found: secrets.clone(),
path: delta
.new_file()
.path()
.unwrap()
.to_str()
.unwrap()
.to_string(),
reason: reason.clone(),
});
}
}
if scan_entropy {
let ef = SecretScanner::entropy_findings(new_line);
if !ef.is_empty() {
findings.insert(GitFinding {
commit: commit.message().unwrap().to_string(),
commit_hash: commit.id().to_string(),
diff: ASCII
.decode(&new_line, DecoderTrap::Ignore)
.unwrap_or_else(|_| "<STRING DECODE ERROR>".parse().unwrap()),
date: NaiveDateTime::from_timestamp(commit.time().seconds(), 0)
.to_string(),
strings_found: ef,
path: delta
.new_file()
.path()
.unwrap()
.to_str()
.unwrap()
.to_string(),
reason: "Entropy".to_string(),
});
}
}
true
})
.unwrap();
}
findings
}
fn get_https_git_repo(
https_git_url: &str,
dest_dir: &Path,
httpsuser: &str,
httpspass: &str,
) -> Repository {
let mut cb = git2::RemoteCallbacks::new();
cb.credentials(|_, _, _| {
info!("HTTPS auth detected, attempting to create credentials object...");
let credentials = git2::Cred::userpass_plaintext(httpsuser, httpspass)
.expect("Cannot create credentials object.");
Ok(credentials)
});
let mut fo = git2::FetchOptions::new();
fo.remote_callbacks(cb);
let mut builder = git2::build::RepoBuilder::new();
builder.fetch_options(fo);
info!("HTTPS Git credentials successfully initialized, attempting to clone the repo...");
match builder.clone(https_git_url, dest_dir) {
Ok(r) => r,
Err(e) => panic!(
"<GITPATH> {:?} is a HTTPS GIT URL but couldn't be cloned. If your GitHub account \
uses 2FA make sure to use a personal access token as your password!:\n{:?}",
https_git_url, e
),
}
}
fn get_ssh_git_repo(
ssh_git_url: &str,
dest_dir: &Path,
sshkeypath: Option<&str>,
sshkeyphrase: Option<&str>,
username: &str,
) -> Repository {
info!("username in get_ssh_git_repo: {:?}", username);
let mut cb = git2::RemoteCallbacks::new();
if sshkeypath.is_some() {
cb.credentials(|_, _, _| {
info!("SSHKEYPATH detected, attempting to read credentials from supplied path...");
let credentials = git2::Cred::ssh_key(
username,
None,
Path::new(sshkeypath.unwrap()),
sshkeyphrase,
)
.expect("Cannot create credentials object.");
Ok(credentials)
});
} else {
cb.credentials(|_, _, _| {
info!("no SSHKEYPATH detected, attempting to read credentials from ssh_agent...");
let credentials = git2::Cred::ssh_key_from_agent(username)
.expect("Cannot create credentials object from ssh_agent");
Ok(credentials)
});
}
let mut fo = git2::FetchOptions::new();
fo.remote_callbacks(cb);
let mut builder = git2::build::RepoBuilder::new();
builder.fetch_options(fo);
info!("SSH Git credentials successfully initialized, attempting to clone the repo...");
match builder.clone(ssh_git_url, dest_dir) {
Ok(r) => r,
Err(e) => panic!(
"<GITPATH> {:?} is a SSH GIT URL but couldn't be cloned:\n{:?}",
ssh_git_url, e
),
}
}
pub fn init_git_repo(
mut self,
path: &str,
dest_dir: &Path,
sshkeypath: Option<&str>,
sshkeyphrase: Option<&str>,
httpsuser: Option<&str>,
httpspass: Option<&str>,
) -> Self {
let url = Url::parse(path);
self.scheme = match &url {
Ok(url) => match url.scheme().to_ascii_lowercase().as_ref() {
"http" => {
info!("Git scheme detected as http://, performing a clone...");
Some(GitScheme::Http)
}
"https" => {
info!("Git scheme detected as https:// , performing a clone...");
Some(GitScheme::Http)
}
"file" => {
info!("Git scheme detected as file://, performing a clone...");
Some(GitScheme::Localpath)
}
"ssh" => {
info!("Git scheme detected as ssh://, performing a clone...");
Some(GitScheme::Ssh)
}
"git" => {
info!("Git scheme detected as git://, performing a clone...");
Some(GitScheme::Git)
}
s => panic!(
"Error parsing GITPATH {:?}, please include the username with \"git@\"",
s
),
},
Err(e) => match e {
ParseError::RelativeUrlWithoutBase => {
info!(
"Git scheme detected as a relative path, attempting to open on the local \
file system and then falling back to SSH..."
);
Some(GitScheme::Relativepath)
}
e => panic!("Unknown error parsing GITPATH: {:?}", e),
},
};
self.repo = match self.scheme {
None => panic!("Git scheme not detected?"),
Some(GitScheme::Localpath) => match Repository::clone(path, dest_dir) {
Ok(r) => Some(r),
Err(e) => panic!(
"<GITPATH> {:?} was detected as a local path but couldn't be opened: {:?}",
path, e
),
},
Some(GitScheme::Http) => {
let httpsuser = match httpsuser {
Some(s) => s,
None => panic!("HTTPS GIT URL detected but no username supplied"),
};
let httpspass = match httpspass {
Some(s) => s,
None => panic!("HTTPS GIT URL detected but no password supplied"),
};
Some(Self::get_https_git_repo(
path, dest_dir, httpsuser, httpspass,
))
}
Some(GitScheme::Git) => {
let url = url.unwrap(); let username = match url.username() {
"" => "git",
s => s,
};
Some(Self::get_ssh_git_repo(
path,
dest_dir,
sshkeypath,
sshkeyphrase,
username,
))
}
Some(GitScheme::Ssh) => {
let url = url.unwrap(); let username = url.username();
Some(Self::get_ssh_git_repo(
path,
dest_dir,
sshkeypath,
sshkeyphrase,
username,
))
}
Some(GitScheme::Relativepath) => match Repository::open(path) {
Ok(r) => Some(r),
Err(_) => {
let username = match path.find('@') {
Some(i) => path.split_at(i).0,
None => "git",
};
Some(Self::get_ssh_git_repo(
path,
dest_dir,
sshkeypath,
sshkeyphrase,
username,
))
}
},
};
self
}
}
impl fmt::Debug for GitScanner {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let repo_str = match self.repo.as_ref() {
None => "None",
Some(repo_obj) => repo_obj.path().to_str().unwrap_or_else(|| "<path unwrap error>")
};
write!(f, "GitScanner: SecretScanner: {:?}, Repo: {:?}, GitScheme: {:?}", self.secret_scanner, repo_str, self.scheme)
}
}
impl fmt::Display for GitScanner {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let repo_str = match self.repo.as_ref() {
None => "None",
Some(repo_obj) => repo_obj.path().to_str().unwrap_or_else(|| "<path unwrap error>")
};
let scheme_string: String = match self.scheme.as_ref() {
None => String::from("None"),
Some(s) => fmt::format(format_args!("{}", s))
};
write!(f, "GitScanner: SecretScanner: {}, Repo: {}, GitScheme: {}", self.secret_scanner, repo_str, &scheme_string)
}
}
impl fmt::Debug for GitScheme {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let display_string = match self {
GitScheme::Localpath => "localpath",
GitScheme::Http => "http",
GitScheme::Ssh => "ssh",
GitScheme::Relativepath => "relativepath",
GitScheme::Git => "git",
};
write!(f, "GitScheme: {}", display_string)
}
}
impl fmt::Display for GitScheme {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let display_string = match self {
GitScheme::Localpath => "localpath",
GitScheme::Http => "http",
GitScheme::Ssh => "ssh",
GitScheme::Relativepath => "relativepath",
GitScheme::Git => "git",
};
write!(f, "GitScheme: {}", display_string)
}
}
impl PartialEq for GitScheme {
fn eq(&self, other: &Self) -> bool {
format!("{}", self) == format!("{}", other)
}
}
impl Eq for GitScheme {}
impl PartialEq for GitScanner {
fn eq(&self, other: &Self) -> bool {
self.secret_scanner == other.secret_scanner &&
match self.scheme.as_ref() {
None => other.scheme.is_none(),
Some(gs) => match other.scheme.as_ref() {
None => false,
Some(gs2) => *gs == *gs2
}
} &&
match self.repo.as_ref() {
None => other.repo.is_none(),
Some(r) => match other.repo.as_ref() {
None => false,
Some(r2) => r.path() == r2.path()
}
}
}
}
impl Eq for GitScanner {}
impl Hash for GitScanner {
fn hash<H: Hasher>(&self, state: &mut H) {
self.secret_scanner.hash(state);
match self.repo.as_ref() {
None => "norepo".hash(state),
Some(r) => r.path().hash(state)
};
match self.scheme.as_ref() {
None => "noscheme".hash(state),
Some(gs) => match gs {
GitScheme::Localpath => "localpath".hash(state),
GitScheme::Http => "http".hash(state),
GitScheme::Ssh => "ssh".hash(state),
GitScheme::Relativepath => "relativepath".hash(state),
GitScheme::Git => "git".hash(state),
}
}
}
}
impl Default for GitScanner {
fn default() -> Self {
Self::new()
}
}