from datetime import datetime, timedelta
from github import Github, GithubException
from multiprocessing import Pool
import gzip
import json
import os
import requests
import subprocess
import tempfile
import uuid
import logging
import sys
if len(sys.argv) == 2 and sys.argv[1].startswith("--log="):
loglevel = sys.argv[1][6:]
else:
loglevel = "WARNING"
numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError('Invalid log level: %s' % loglevel)
logging.basicConfig(level=numeric_level)
GHE_REPO_TOKEN = os.environ["GHE_REPO_TOKEN"]
INSIGHTS_INSERT_KEY = os.environ["INSIGHTS_INSERT_KEY"]
SSH_KEY_PATH = os.environ["SSH_KEY_PATH"]
INSIGHTS_ACCT_ID = os.environ["INSIGHTS_ACCT_ID"]
GHE_DOMAIN = os.environ["GHE_DOMAIN"]
CHOCTAW_HOG_PATH = os.environ["CHOCTAW_HOG_PATH"]
g = Github(base_url=f"https://{GHE_DOMAIN}/api/v3", login_or_token=GHE_REPO_TOKEN, per_page=100)
repos = g.get_repos()
today = datetime.today()
twentyfourhoursago = today - timedelta(hours=24)
repo_dict = {}
for repo in repos:
commits = []
try:
commits = list(repo.get_commits(since=twentyfourhoursago))
except GithubException as e:
logging.debug(e)
continue
if len(commits) == 0:
logging.debug("len(commits) == 0")
continue
if not repo.ssh_url:
logging.debug("no SSH URL")
continue
logging.info(f"({repo.ssh_url}, {commits[-1].sha}")
repo_dict[repo.ssh_url] = (commits[-1].sha, f"{repo.html_url}/commit/")
repo_dict = dict(
filter(lambda x: x[1], repo_dict.items())
)
logging.info(f"len(repo_dict) = {len(repo_dict)}")
tempdir = tempfile.gettempdir()
def scan_repo(x):
filename = os.path.join(tempdir, str(uuid.uuid4()))
s = subprocess.run(
[
CHOCTAW_HOG_PATH,
"--outputfile",
filename,
"--since_commit",
x[1][0],
"--sshkeypath",
SSH_KEY_PATH,
x[0],
],
capture_output=True,
)
return {"repo": x[0], "results": filename, "url": x[1][1]}
output = []
with Pool(3) as p:
output.extend(p.map(scan_repo, repo_dict.items()))
logging.info(f"len(output) = {len(output)}")
logging.debug(output)
output_array = []
for result_dict in output:
try:
f = open(result_dict["results"], "r")
except:
continue
with f:
result_list = json.load(f)
for finding in result_list:
output_array.append(
{
"eventType": "ghe_secret_monitor",
"commitHash": finding["commitHash"],
"reason": finding["reason"],
"path": finding["path"],
"repo": result_dict["repo"],
"url": result_dict["url"] + finding["commitHash"]
}
)
url = "https://insights-collector.newrelic.com/v1/accounts/{INSIGHTS_ACCT_ID}/events"
headers = {
"Content-Type": "application/json",
"X-Insert-Key": INSIGHTS_INSERT_KEY,
"Content-Encoding": "gzip",
}
post = gzip.compress(json.dumps(output_array).encode("utf-8"))
logging.info(f"len(output_array) = {len(output_array)}")
logging.debug(output_array)
r = requests.post(url, data=post, headers=headers)
logging.info(f"insights status code: {r.status_code}")