pub mod aws_scanning;
pub mod git_scanning;
pub mod google_scanning;
use clap::ArgMatches;
use hex;
use log::{self, error, info};
use regex::bytes::{Matches, Regex, RegexBuilder};
use serde::Serialize;
use serde_json::{Map, Value};
use simple_error::SimpleError;
use simple_logger::init_with_level;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fs::File;
use std::hash::{Hash, Hasher};
use std::io::BufReader;
use std::iter::FromIterator;
use std::{fmt, fs, str};
const DEFAULT_REGEX_JSON: &str = r##"
{
"Slack Token": "(xox[p|b|o|a]-[0-9]{12}-[0-9]{12}-[0-9]{12}-[a-z0-9]{32})",
"RSA private key": "-----BEGIN RSA PRIVATE KEY-----",
"SSH (DSA) private key": "-----BEGIN DSA PRIVATE KEY-----",
"SSH (EC) private key": "-----BEGIN EC PRIVATE KEY-----",
"PGP private key block": "-----BEGIN PGP PRIVATE KEY BLOCK-----",
"Amazon AWS Access Key ID": "AKIA[0-9A-Z]{16}",
"Amazon MWS Auth Token": "amzn\\.mws\\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
"AWS API Key": "AKIA[0-9A-Z]{16}",
"Facebook Access Token": "EAACEdEose0cBA[0-9A-Za-z]+",
"Facebook OAuth": "(?i)facebook[\\s[[:punct:]]]{1,4}[0-9a-f]{32}[\\s[[:punct:]]]?",
"GitHub": "(?i)github[\\s[[:punct:]]]{1,4}[0-9a-zA-Z]{35,40}",
"Generic API Key": "(?i)(api|access)[\\s[[:punct:]]]?key[\\s[[:punct:]]]{1,4}[0-9a-zA-Z\\-_]{16,64}[\\s[[:punct:]]]?",
"Generic Account API Key": "(?i)account[\\s[[:punct:]]]?api[\\s[[:punct:]]]{1,4}[0-9a-zA-Z\\-_]{16,64}[\\s[[:punct:]]]?",
"Generic Secret": "(?i)secret[\\s[[:punct:]]]{1,4}[0-9a-zA-Z-_]{16,64}[\\s[[:punct:]]]?",
"Google API Key": "AIza[0-9A-Za-z\\-_]{35}",
"Google Cloud Platform API Key": "AIza[0-9A-Za-z\\-_]{35}",
"Google Cloud Platform OAuth": "(?i)[0-9]+-[0-9A-Za-z_]{32}\\.apps\\.googleusercontent\\.com",
"Google Drive API Key": "AIza[0-9A-Za-z\\-_]{35}",
"Google Drive OAuth": "(?i)[0-9]+-[0-9A-Za-z_]{32}\\.apps\\.googleusercontent\\.com",
"Google (GCP) Service-account": "(?i)\"type\": \"service_account\"",
"Google Gmail API Key": "AIza[0-9A-Za-z\\-_]{35}",
"Google Gmail OAuth": "(?i)[0-9]+-[0-9A-Za-z_]{32}\\.apps\\.googleusercontent\\.com",
"Google OAuth Access Token": "ya29\\.[0-9A-Za-z\\-_]+",
"Google YouTube API Key": "AIza[0-9A-Za-z\\-_]{35}",
"Google YouTube OAuth": "(?i)[0-9]+-[0-9A-Za-z_]{32}\\.apps\\.googleusercontent\\.com",
"Heroku API Key": "[h|H][e|E][r|R][o|O][k|K][u|U][\\s[[:punct:]]]{1,4}[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}",
"MailChimp API Key": "[0-9a-f]{32}-us[0-9]{1,2}",
"Mailgun API Key": "(?i)key-[0-9a-zA-Z]{32}",
"Credentials in absolute URL": "(?i)((https?|ftp)://)(([a-z0-9$_\\.\\+!\\*'\\(\\),;\\?&=-]|%[0-9a-f]{2})+(:([a-z0-9$_\\.\\+!\\*'\\(\\),;\\?&=-]|%[0-9a-f]{2})+)?@)((([a-z0-9]\\.|[a-z0-9][a-z0-9-]*[a-z0-9]\\.)*[a-z][a-z0-9-]*[a-z0-9]|((\\d|[1-9]\\d|1\\d{2}|2[0-4][0-9]|25[0-5])\\.){3}(\\d|[1-9]\\d|1\\d{2}|2[0-4][0-9]|25[0-5]))(:\\d+)?)(((/+([a-z0-9$_\\.\\+!\\*'\\(\\),;:@&=-]|%[0-9a-f]{2})*)*(\\?([a-z0-9$_\\.\\+!\\*'\\(\\),;:@&=-]|%[0-9a-f]{2})*)?)?)?",
"PayPal Braintree Access Token": "(?i)access_token\\$production\\$[0-9a-z]{16}\\$[0-9a-f]{32}",
"Picatic API Key": "(?i)sk_live_[0-9a-z]{32}",
"Slack Webhook": "(?i)https://hooks.slack.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8}/[a-zA-Z0-9_]{24}",
"Stripe API Key": "(?i)sk_live_[0-9a-zA-Z]{24}",
"Stripe Restricted API Key": "(?i)rk_live_[0-9a-zA-Z]{24}",
"Square Access Token": "(?i)sq0atp-[0-9A-Za-z\\-_]{22}",
"Square OAuth Secret": "(?i)sq0csp-[0-9A-Za-z\\-_]{43}",
"Twilio API Key": "SK[0-9a-fA-F]{32}",
"Twitter Access Token": "(?i)twitter[\\s[[:punct:]]]{1,4}[1-9][0-9]+-[0-9a-zA-Z]{40}",
"Twitter OAuth": "(?i)twitter[\\s[[:punct:]]]{1,4}['|\"]?[0-9a-zA-Z]{35,44}['|\"]?",
"New Relic Partner & REST API Key": "[^\\w./\\-\\+][A-Fa-f0-9]{47}[^\\w./\\-\\+]",
"New Relic Mobile Application Token": "[^\\w./\\-\\+][A-Fa-f0-9]{42}[^\\w./\\-\\+]",
"New Relic Synthetics Private Location": "(?i)minion_private_location_key",
"New Relic Insights Key (specific)": "(?i)insights[\\s[[:punct:]]]?(key|query|insert)[\\s[[:punct:]]]{1,4}\\b[\\w-]{32,40}\\b",
"New Relic Insights Key (vague)": "(?i)(query|insert)[\\s[[:punct:]]]?key[\\s[[:punct:]]]{1,4}b[\\w-]{32,40}\\b",
"New Relic License Key": "(?i)license[\\s[[:punct:]]]?key[\\s[[:punct:]]]{1,4}\\b[\\w-]{32,40}\\b",
"New Relic Internal API Key": "(?i)nr-internal-api-key",
"New Relic HTTP Auth Headers and API Key": "(?i)(x|newrelic|nr)-(partner|account|query|insert|api|license)-(id|key)[\\s[[:punct:]]]{1,4}\\b[\\w-]{32,47}\\b",
"New Relic API Key Service Key (new format)": "(?i)NRAK-[A-Z0-9]{27}",
"New Relic APM License Key (new format)": "(?i)[a-f0-9]{36}NRAL",
"New Relic APM License Key (new format, region-aware)": "(?i)[a-z]{2}[0-9]{2}xx[a-f0-9]{30}NRAL",
"New Relic REST API Key (new format)": "(?i)NRRA-[a-f0-9]{42}",
"New Relic Admin API Key (new format)": "(?i)NRAA-[a-f0-9]{27}",
"New Relic Insights Insert Key (new format)": "(?i)NRII-[A-Za-z0-9-_]{32}",
"New Relic Insights Query Key (new format)": "(?i)NRIQ-[A-Za-z0-9-_]{32}",
"New Relic Synthetics Private Location Key (new format)": "(?i)NRSP-[a-z]{2}[0-9]{2}[a-f0-9]{31}",
"Email address": "(?i)(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])",
"New Relic Account IDs in URL": "(newrelic\\.com/)?accounts/\\d{1,10}/",
"Account ID": "(?i)account[\\s[[:punct:]]]?id[\\s[[:punct:]]]{1,4}\\b[\\d]{1,10}\\b",
"Salary Information": "(?i)(salary|commission|compensation|pay)([\\s[[:punct:]]](amount|target))?[\\s[[:punct:]]]{1,4}\\d+"
}
"##;
const STANDARD_ENCODE: &[u8; 64] = &[
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
89,
90,
97,
98,
99,
100,
101,
102,
103,
104,
105,
106,
107,
108,
109,
110,
111,
112,
113,
114,
115,
116,
117,
118,
119,
120,
121,
122,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
43,
47,
];
#[derive(Debug, Clone)]
pub struct SecretScanner {
pub regex_map: BTreeMap<String, Regex>,
pub pretty_print: bool,
pub output_path: Option<String>,
}
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct SecretScannerBuilder {
pub case_insensitive: bool,
pub regex_json_str: Option<String>,
pub regex_json_path: Option<String>,
pub pretty_print: bool,
pub output_path: Option<String>,
}
impl SecretScannerBuilder {
pub fn new() -> Self {
Self {
case_insensitive: false,
regex_json_str: None,
regex_json_path: None,
pretty_print: false,
output_path: None,
}
}
pub fn conf_argm(mut self, arg_matches: &ArgMatches) -> Self {
self.case_insensitive = arg_matches.is_present("CASE");
self.output_path = match arg_matches.value_of("REGEX") {
Some(s) => Some(String::from(s)),
None => None,
};
self.pretty_print = arg_matches.is_present("PRETTYPRINT");
self.output_path = match arg_matches.value_of("OUTPUT") {
Some(s) => Some(String::from(s)),
None => None,
};
self
}
pub fn set_json_path(mut self, json_path: &str) -> Self {
self.regex_json_path = Some(String::from(json_path));
self
}
pub fn set_json_str(mut self, json_str: &str) -> Self {
self.regex_json_str = Some(String::from(json_str));
self
}
pub fn global_case_insensitive(mut self, case_insensitive: bool) -> Self {
self.case_insensitive = case_insensitive;
self
}
pub fn set_pretty_print(mut self, pretty_print: bool) -> Self {
self.pretty_print = pretty_print;
self
}
pub fn set_output_path(mut self, output_path: &str) -> Self {
self.output_path = Some(String::from(output_path));
self
}
pub fn build(&self) -> SecretScanner {
let json_obj: Result<Map<String, Value>, SimpleError> = match &self.regex_json_path {
Some(p) => Self::build_json_from_file(&p),
_ => match &self.regex_json_str {
Some(s) => Self::build_json_from_str(&s),
_ => Self::build_json_from_str(DEFAULT_REGEX_JSON),
},
};
let json_obj: Map<String, Value> = match json_obj {
Ok(x) => x,
Err(e) => {
error!(
"Error parsing Regex JSON object, falling back to default regex rules: {:?}",
e
);
Self::build_json_from_str(DEFAULT_REGEX_JSON).unwrap()
}
};
let regex_map = Self::build_regex_objects(json_obj, self.case_insensitive);
let output_path = match &self.output_path {
Some(s) => Some(s.clone()),
None => None,
};
SecretScanner {
regex_map,
pretty_print: self.pretty_print,
output_path,
}
}
fn build_json_from_file(filename: &str) -> Result<Map<String, Value>, SimpleError> {
info!("Attempting to read JSON regex file from {:?}", filename);
let regexes_filein = File::open(filename);
let f = match regexes_filein {
Ok(file) => file,
Err(e) => return Err(SimpleError::with("Failed to open the JSON regex file", e)),
};
let reader = BufReader::new(f);
info!("Attempting to parse JSON regex file from {:?}", filename);
match serde_json::from_reader(reader) {
Ok(m) => Ok(m),
Err(e) => Err(SimpleError::with("Failed to parse regex JSON", e)),
}
}
fn build_json_from_str(incoming_str: &str) -> Result<Map<String, Value>, SimpleError> {
info!("Attempting to parse JSON regex file from provided string...");
match serde_json::from_str(incoming_str) {
Ok(m) => Ok(m),
Err(e) => Err(SimpleError::with("Failed to parse regex JSON", e)),
}
}
fn build_regex_objects(
json_obj: Map<String, Value>,
case_insensitive: bool,
) -> BTreeMap<String, Regex> {
let regex_map: BTreeMap<String, String> = json_obj
.into_iter()
.map(|x| (x.0, String::from(x.1.as_str().unwrap())))
.collect();
regex_map
.into_iter()
.map(|x| {
let mut regex_builder = RegexBuilder::new(&x.1);
regex_builder.size_limit(10_000_000);
if case_insensitive {
regex_builder.case_insensitive(true);
};
(x.0, regex_builder.build())
})
.inspect(|(_, x)| {
if let Err(ref e) = x {
error!("Error parsing regex string: {:?}", e)
}
})
.filter(|(_, x)| x.is_ok())
.map(|(k, v)| (k, v.unwrap()))
.collect()
}
}
impl SecretScanner {
pub fn set_logging(verbose_level: u64) {
match verbose_level {
0 => init_with_level(log::Level::Warn).unwrap(),
1 => init_with_level(log::Level::Info).unwrap(),
2 => init_with_level(log::Level::Debug).unwrap(),
3 | _ => init_with_level(log::Level::Trace).unwrap(),
}
}
pub fn matches<'a, 'b: 'a>(&'a self, line: &'b [u8]) -> BTreeMap<&'a String, Matches> {
self.regex_map
.iter()
.map(|x| {
let matches = x.1.find_iter(line);
(x.0, matches)
})
.collect()
}
fn is_base64_string(string_in: &[u8]) -> bool {
let hashset_string_in: HashSet<&u8> = HashSet::from_iter(string_in.iter());
hashset_string_in.is_subset(&HashSet::from_iter(STANDARD_ENCODE.iter()))
}
fn calc_entropy(bytes: &[u8], keyspace: i32) -> f32 {
let mut entropy = 0.0;
let mut counts: HashMap<u8, i32> = HashMap::new();
for &b in bytes {
counts.insert(b, counts.get(&b).unwrap_or(&0) + 1);
}
for &count in counts.values() {
let p: f32 = (count as f32) / (keyspace as f32);
entropy -= p * p.log(2.0);
}
entropy
}
pub fn entropy_findings(line: &[u8]) -> Vec<String> {
let words: Vec<&[u8]> = line.split(|x| (*x as char) == ' ').collect();
let words: Vec<&[u8]> = words
.into_iter()
.map(|x| {
std::str::from_utf8(x)
.unwrap_or("")
.trim_matches(|y: char| {
(y == '\'')
|| (y == '"')
|| (y == '\r')
|| (y == '\n')
|| (y == '(')
|| (y == ')')
})
.as_bytes()
})
.collect();
let mut b64_words: Vec<String> = words
.iter()
.filter(|word| word.len() >= 20 && Self::is_base64_string(word))
.filter(|word| Self::calc_entropy(word, 64) > 4.5)
.map(|word| str::from_utf8(word).unwrap().to_string())
.collect();
let mut hex_words: Vec<String> = words
.iter()
.filter(|word| (word.len() >= 20) && (word.iter().all(u8::is_ascii_hexdigit)))
.filter_map(|&x| hex::decode(x).ok())
.filter(|word| Self::calc_entropy(word, 255) > (3_f32))
.map(hex::encode)
.collect();
let mut output: Vec<String> = Vec::new();
output.append(&mut b64_words);
output.append(&mut hex_words);
output
}
pub fn output_findings<T: Serialize + Eq + Hash>(&self, findings: &HashSet<T>) {
let mut json_text: Vec<u8> = Vec::new();
if self.pretty_print {
json_text.append(serde_json::ser::to_vec_pretty(findings).unwrap().as_mut());
} else {
json_text.append(serde_json::ser::to_vec(findings).unwrap().as_mut());
}
match &self.output_path {
Some(op) => fs::write(op, json_text).unwrap(),
None => println!("{}", str::from_utf8(json_text.as_ref()).unwrap()),
};
}
}
impl fmt::Display for SecretScanner {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let pp = if self.pretty_print { "True" } else { "False" };
let op = if let Some(p) = self.output_path.as_ref() {
p
} else {
"None"
};
write!(
f,
"SecretScanner: Regex_map len:{}, Pretty print:{}, Output path:{}",
self.regex_map.len(),
pp,
op
)
}
}
impl PartialEq for SecretScanner {
fn eq(&self, other: &Self) -> bool {
self.regex_map
.iter()
.map(|(k, v)| match other.regex_map.get(k) {
None => false,
Some(r) => r.as_str() == v.as_str(),
})
.all(|x| x)
&& self.regex_map.keys().eq(other.regex_map.keys())
&& self.pretty_print == other.pretty_print
&& match self.output_path.as_ref() {
None => other.output_path.is_none(),
Some(s) => match other.output_path.as_ref() {
None => false,
Some(t) => *s == *t,
},
}
}
}
impl Eq for SecretScanner {}
impl Hash for SecretScanner {
fn hash<H: Hasher>(&self, state: &mut H) {
for (k,v) in &self.regex_map {
k.hash(state);
v.as_str().hash(state);
};
match self.pretty_print {
false => "prettyprintno".hash(state),
true => "prettyprintyes".hash(state)
};
match self.output_path.as_ref() {
None => "outputpathno".hash(state),
Some(s) => s.hash(state)
};
}
}
impl Default for SecretScanner {
fn default() -> Self {
let ssb = SecretScannerBuilder::new();
ssb.build()
}
}
impl Default for SecretScannerBuilder {
fn default() -> Self {
Self::new()
}
}