extern crate log;
extern crate pretty_env_logger;
use keyhunter::{
report::{Reporter, ReporterBuilder},
ApiKeyCollector, ApiKeyError, ApiKeyMessage, Config, ScriptMessage, WebsiteWalkBuilder,
};
use log::{error, info};
use miette::{miette, Context as _, Error, IntoDiagnostic as _, Result};
use rand::random;
use std::{
env,
fs::{self, File},
io::{BufWriter, Write},
path::{Path, PathBuf},
sync::{mpsc, Arc, RwLock},
thread,
time::Duration,
};
type SyncReporter<R> = Arc<RwLock<Reporter<R>>>;
fn yc_path() -> Result<PathBuf> {
let file_path = PathBuf::from(file!()).canonicalize().into_diagnostic()?;
let root_dir = file_path
.parent() .and_then(Path::parent) .ok_or_else(|| miette!("Could not resolve repo root directory"))?;
println!("{}", root_dir.display());
let yc_companies = root_dir.join("tmp/yc-companies.csv");
assert!(
yc_companies.exists(),
"YC Companies CSV not found. Did you run `make yc-companies.csv`? (path: {})",
yc_companies.display()
);
assert!(
yc_companies.is_file(),
"YC Companies entry at {} is not a file.",
yc_companies.display()
);
Ok(yc_companies)
}
fn yc_file() -> Result<String> {
let yc_sites_path =
yc_path().with_context(|| Error::msg("Could not find path to YC Companies CSV"))?;
fs::read_to_string(yc_sites_path)
.into_diagnostic()
.context("Failed to open YC Companies CSV file")
}
fn outfile() -> Result<BufWriter<File>> {
let rand: u32 = random();
fs::create_dir_all("tmp").into_diagnostic()?;
let outfile_name = PathBuf::from(format!("tmp/api-keys-{rand}.jsonl"));
info!(target:"keyhunter::main", "API keys will be stored in {}", outfile_name.display());
let file = File::options()
.create(true)
.truncate(true)
.write(true)
.append(false)
.open(outfile_name)
.into_diagnostic()?;
let writer = BufWriter::new(file);
Ok(writer)
}
fn write_keys(output: &mut BufWriter<File>, api_key: ApiKeyError) -> Result<()> {
let line = serde_json::to_string(&api_key).into_diagnostic()?;
writeln!(output, "{}", line).into_diagnostic()
}
fn main() -> Result<()> {
if env::var("RUST_LOG").is_err() {
env::set_var("RUST_LOG", "keyhunter=info");
}
pretty_env_logger::init();
let max_walks: usize = env::var("MAX_WALKS")
.into_diagnostic()
.and_then(|w| w.parse().into_diagnostic())
.unwrap_or(30);
assert!(
max_walks > 0,
"MAX_WALKS cannot be zero otherwise no pages will be checked!"
);
let config = Arc::new(Config::gitleaks());
let reporter: SyncReporter<_> = Arc::new(RwLock::new(
ReporterBuilder::default().with_redacted(true).graphical(),
));
let yc_sites_raw = yc_file().unwrap();
let yc_reader = csv::Reader::from_reader(yc_sites_raw.as_bytes());
let mut key_writer = outfile()?;
let (key_sender, key_receiver) = mpsc::channel::<ApiKeyMessage>();
thread::spawn(move || {
while let Ok(message) = key_receiver.recv() {
match message {
ApiKeyMessage::Keys(api_keys) => {
reporter.write().unwrap().report_keys(&api_keys).unwrap();
for api_key in api_keys {
let url = api_key.url.clone();
write_keys(&mut key_writer, api_key)
.context(format!("Failed to write api keys for script {}", &url))
.unwrap();
}
let _ = key_writer.flush();
}
ApiKeyMessage::RecoverableFailure(e) => {
println!("{:?}", e)
}
ApiKeyMessage::Stop => {
break;
}
_ => {}
}
}
let _ = key_writer.flush();
});
let walk_builder = WebsiteWalkBuilder::new()
.with_max_walks(max_walks)
.with_random_ua(true)
.with_cookie_jar(true)
.with_shared_cache(true)
.with_close_channel(false)
.with_timeout(Duration::from_secs(15))
.with_timeout_connect(Duration::from_secs(2));
yc_reader
.into_records()
.flatten()
.for_each(|record| {
let name = &record[0];
let url = record[1].to_string();
if name.eq_ignore_ascii_case("million") {
return;
}
info!(target: "keyhunter::main", "Scraping keys for site {name}...");
let (tx_scripts, rx_scripts) = mpsc::channel::<ScriptMessage>();
let walker = walk_builder.build(tx_scripts.clone());
let collector = ApiKeyCollector::new(config.clone(), rx_scripts, key_sender.clone());
let moved_url = url.clone();
let walk_handle = thread::spawn(move || {
let result = walker.walk(&moved_url);
if result.is_err() {
error!(target: "keyhunter::main",
"failed to create walker: {}",
result.as_ref().unwrap_err()
);
tx_scripts
.send(ScriptMessage::Done)
.into_diagnostic()
.context("Failed to send stop signal over script channel")
.unwrap();
}
result
});
let collector_handle = thread::spawn(move || collector.collect());
collector_handle
.join()
.expect("ApiKeyCollector thread should have joined successfully");
let walk_result = walk_handle
.join()
.expect("WebsiteWalker thread should have joined successfully");
match walk_result {
Ok(_) => {
info!(target: "keyhunter::main", "Done scraping for {name}");
}
Err(e) => {
error!(target: "keyhunter::main", "[run] Failed to scrape for '{url}': {e}");
}
}
});
key_sender
.send(ApiKeyMessage::Stop)
.into_diagnostic()
.context("Failed to close API key channel")
.unwrap();
info!("Scraping completed");
Ok(())
}