use std::collections::{BTreeSet, HashMap};
use std::net::IpAddr;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use rusqlite::{params, Connection};
use serde::{Deserialize, Serialize};
use crate::config::InfraIntelConfig;
use crate::discovery::{
cert::PeerCert,
dns::DnsFacts,
network_probe::{cloud_lookup, reverse_dns, tcp_probe_ports, PortProbe, TOP_PORTS},
subdomains::{certspotter_subdomains, crtsh_subdomains, hackertarget_subdomains},
};
use crate::impersonate::{ImpersonateClient, Profile};
use crate::{Error, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum IntelStage {
Subdomains,
Dns,
Whois,
Cert,
}
impl IntelStage {
pub fn as_str(self) -> &'static str {
match self {
Self::Subdomains => "subdomains",
Self::Dns => "dns",
Self::Whois => "whois",
Self::Cert => "cert",
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct IntelReport {
pub target_root: String,
pub subdomains: Vec<String>,
pub dns_record_count: usize,
pub unique_ips: Vec<String>,
pub certs_captured: usize,
pub whois_registrar: Option<String>,
pub whois_created: Option<String>,
pub whois_expires: Option<String>,
pub errors: Vec<String>,
pub elapsed_ms: u64,
}
pub struct TargetIntelOrchestrator {
conn: Connection,
http: Arc<ImpersonateClient>,
cfg: InfraIntelConfig,
}
impl TargetIntelOrchestrator {
pub fn open(db_path: &Path, cfg: InfraIntelConfig) -> Result<Self> {
#[cfg(feature = "sqlite")]
{
let _ = crate::storage::sqlite::SqliteStorage::open(db_path)
.map_err(|e| Error::Storage(format!("open: {e}")))?;
}
let conn =
Connection::open(db_path).map_err(|e| Error::Storage(format!("rusqlite open: {e}")))?;
let http = Arc::new(
ImpersonateClient::new(Profile::Chrome131Stable)
.map_err(|e| Error::Config(format!("impersonate client: {e}")))?,
);
Ok(Self { conn, http, cfg })
}
pub async fn run(&mut self, target_root: &str) -> Result<IntelReport> {
let start = std::time::Instant::now();
let mut report = IntelReport {
target_root: target_root.to_string(),
..Default::default()
};
let mut all_domains: BTreeSet<String> = BTreeSet::new();
all_domains.insert(target_root.to_string());
if self.cfg.subdomains {
match crtsh_subdomains(&self.http, target_root).await {
Ok(subs) => {
for s in subs {
all_domains.insert(s);
}
}
Err(e) => report
.errors
.push(format!("[subdomains] crt.sh failed: {e}")),
}
match certspotter_subdomains(&self.http, target_root).await {
Ok(subs) => {
for s in subs {
all_domains.insert(s);
}
}
Err(e) => report
.errors
.push(format!("[subdomains] certspotter failed: {e}")),
}
match hackertarget_subdomains(&self.http, target_root).await {
Ok(subs) => {
for s in subs {
all_domains.insert(s);
}
}
Err(e) => report
.errors
.push(format!("[subdomains] hackertarget failed: {e}")),
}
}
report.subdomains = all_domains
.iter()
.filter(|d| d.as_str() != target_root)
.cloned()
.collect();
for d in &all_domains {
let is_sub = if d == target_root { 0 } else { 1 };
self.conn
.execute(
"INSERT INTO domains (domain, target_root, is_subdomain) \
VALUES (?1, ?2, ?3) \
ON CONFLICT(domain) DO UPDATE SET last_probed = strftime('%s','now')",
params![d, target_root, is_sub],
)
.map_err(|e| Error::Storage(format!("domains insert: {e}")))?;
}
let mut seen_ips: HashMap<String, BTreeSet<IpAddr>> = HashMap::new();
let mut total_records = 0usize;
if self.cfg.dns {
for d in &all_domains {
let facts = crate::discovery::dns::lookup(d).await;
total_records += self.persist_dns(d, &facts)?;
let mut ips: BTreeSet<IpAddr> = BTreeSet::new();
ips.extend(facts.a.iter().copied());
ips.extend(facts.aaaa.iter().copied());
seen_ips.insert(d.clone(), ips);
}
if self.is_wildcard_dns(target_root).await {
self.conn
.execute(
"UPDATE domains SET is_wildcard_dns = 1 WHERE domain = ?1",
params![target_root],
)
.map_err(|e| Error::Storage(format!("wildcard flag: {e}")))?;
}
}
report.dns_record_count = total_records;
let mut all_ips: BTreeSet<IpAddr> = BTreeSet::new();
for (domain, ips) in &seen_ips {
for ip in ips {
all_ips.insert(*ip);
self.conn
.execute(
"INSERT OR IGNORE INTO ip_addresses (ip) VALUES (?1)",
params![ip.to_string()],
)
.map_err(|e| Error::Storage(format!("ip insert: {e}")))?;
self.conn
.execute(
"INSERT OR IGNORE INTO domain_ips (domain, ip) VALUES (?1, ?2)",
params![domain, ip.to_string()],
)
.map_err(|e| Error::Storage(format!("domain_ips insert: {e}")))?;
}
}
report.unique_ips = all_ips.iter().map(|ip| ip.to_string()).collect();
if self.cfg.whois {
match crate::discovery::whois::lookup(&self.http, target_root).await {
Ok(reg) => {
report.whois_registrar = reg.registrar.clone();
report.whois_created = reg.created.clone();
report.whois_expires = reg.expires.clone();
let created = parse_iso_seconds(reg.created.as_deref());
let expires = parse_iso_seconds(reg.expires.as_deref());
let updated = parse_iso_seconds(reg.last_changed.as_deref());
let ns_json =
serde_json::to_string(®.name_servers).unwrap_or_else(|_| "[]".into());
let status_json =
serde_json::to_string(®.status).unwrap_or_else(|_| "[]".into());
self.conn
.execute(
"INSERT INTO whois_records \
(domain, registrar, registrant_org, created_at, expires_at, \
updated_at, nameservers_json, status_json, abuse_email, raw_json) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10) \
ON CONFLICT(domain) DO UPDATE SET \
registrar=excluded.registrar, \
registrant_org=excluded.registrant_org, \
expires_at=excluded.expires_at, \
updated_at=excluded.updated_at, \
nameservers_json=excluded.nameservers_json, \
status_json=excluded.status_json, \
abuse_email=excluded.abuse_email, \
observed_at=strftime('%s','now')",
params![
target_root,
reg.registrar,
reg.registrant_org,
created,
expires,
updated,
ns_json,
status_json,
reg.abuse_emails.first().cloned(),
serde_json::to_string(®).ok()
],
)
.map_err(|e| Error::Storage(format!("whois insert: {e}")))?;
}
Err(e) => report.errors.push(format!("[whois] {e}")),
}
}
if self.cfg.cert {
for d in &all_domains {
let has_ip = seen_ips.get(d).map(|s| !s.is_empty()).unwrap_or(false);
if !has_ip {
continue;
}
match self.grab_cert(d).await {
Ok(Some(cert)) => {
self.persist_cert(d, &cert)?;
report.certs_captured += 1;
}
Ok(None) => {}
Err(e) => report.errors.push(format!("[cert {d}] {e}")),
}
}
}
if self.cfg.network_probe {
let ips: Vec<IpAddr> = seen_ips.values().flatten().copied().collect();
let mut unique: BTreeSet<IpAddr> = BTreeSet::new();
unique.extend(ips);
for ip in &unique {
let ptr = reverse_dns(*ip).await;
let cloud = cloud_lookup(*ip);
let cloud_provider = cloud.as_ref().map(|c| c.provider);
let cloud_service = cloud.and_then(|c| c.service);
self.conn
.execute(
"UPDATE ip_addresses SET reverse_ptr = ?2, \
cloud_provider = ?3, cdn = ?4, \
last_updated = strftime('%s','now') \
WHERE ip = ?1",
params![ip.to_string(), ptr, cloud_provider, cloud_service],
)
.map_err(|e| Error::Storage(format!("ip_addresses update: {e}")))?;
let probes = tcp_probe_ports(*ip, TOP_PORTS, Duration::from_millis(800)).await;
self.persist_port_probes(*ip, &probes)?;
}
}
report.elapsed_ms = start.elapsed().as_millis() as u64;
Ok(report)
}
fn persist_port_probes(&mut self, ip: IpAddr, probes: &[PortProbe]) -> Result<()> {
for p in probes {
if matches!(
p.state,
crate::discovery::network_probe::PortState::Filtered
) {
continue;
}
self.conn
.execute(
"INSERT OR IGNORE INTO port_probes \
(ip, port, state, banner, service) \
VALUES (?1, ?2, ?3, ?4, ?5)",
params![
ip.to_string(),
p.port as i64,
p.state.as_str(),
p.banner,
p.service
],
)
.map_err(|e| Error::Storage(format!("port_probes insert: {e}")))?;
}
Ok(())
}
fn persist_dns(&mut self, domain: &str, f: &DnsFacts) -> Result<usize> {
let mut n = 0usize;
let insert = |rtype: &str, rdata: &str| -> Result<()> {
self.conn
.execute(
"INSERT OR IGNORE INTO dns_records (domain, record_type, rdata) \
VALUES (?1, ?2, ?3)",
params![domain, rtype, rdata],
)
.map_err(|e| Error::Storage(format!("dns_records insert: {e}")))?;
Ok(())
};
for ip in &f.a {
insert("A", &ip.to_string())?;
n += 1;
}
for ip in &f.aaaa {
insert("AAAA", &ip.to_string())?;
n += 1;
}
for c in &f.cname {
insert("CNAME", c)?;
n += 1;
}
for mx in &f.mx {
insert("MX", mx)?;
n += 1;
}
for txt in &f.txt {
insert("TXT", txt)?;
n += 1;
}
for ns in &f.ns {
insert("NS", ns)?;
n += 1;
}
for caa in &f.caa {
insert("CAA", caa)?;
n += 1;
}
Ok(n)
}
async fn is_wildcard_dns(&self, target: &str) -> bool {
let nonces = [
format!("crawlex-nonce-a8f3.{target}"),
format!("crawlex-nonce-b92x.{target}"),
format!("crawlex-nonce-c41z.{target}"),
];
let mut first: Option<BTreeSet<IpAddr>> = None;
for n in &nonces {
let facts = crate::discovery::dns::lookup(n).await;
let ips: BTreeSet<IpAddr> = facts
.a
.iter()
.copied()
.chain(facts.aaaa.iter().copied())
.collect();
if ips.is_empty() {
return false; }
match &first {
None => first = Some(ips),
Some(prev) => {
if prev != &ips {
return false;
}
}
}
}
true
}
async fn grab_cert(&self, domain: &str) -> Result<Option<PeerCert>> {
let url = url::Url::parse(&format!("https://{domain}/"))?;
let fut = self.http.get(&url);
match tokio::time::timeout(Duration::from_secs(10), fut).await {
Ok(Ok(resp)) => Ok(resp.peer_cert),
Ok(Err(e)) => Err(e),
Err(_) => Err(Error::Http(format!("cert grab timeout for {domain}"))),
}
}
fn persist_cert(&mut self, domain: &str, cert: &PeerCert) -> Result<()> {
let sha = match cert.sha256.as_deref() {
Some(s) => s.to_string(),
None => return Ok(()), };
let is_wildcard = cert
.subject_cn
.as_deref()
.map(|cn| cn.starts_with("*.") || cn.contains("*"))
.unwrap_or(false)
|| cert.sans.iter().any(|s| s.starts_with("*."));
let is_self_signed = cert
.subject_cn
.as_ref()
.zip(cert.issuer_cn.as_ref())
.map(|(s, i)| s == i)
.unwrap_or(false);
let sans_json = serde_json::to_string(&cert.sans).unwrap_or_else(|_| "[]".into());
let not_before = parse_boring_asn1_time(cert.not_before.as_deref());
let not_after = parse_boring_asn1_time(cert.not_after.as_deref());
self.conn
.execute(
"INSERT INTO certs \
(sha256_fingerprint, subject_cn, issuer_cn, not_before, not_after, \
sans_json, is_wildcard, is_self_signed, source) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'tls_handshake') \
ON CONFLICT(sha256_fingerprint) DO NOTHING",
params![
sha,
cert.subject_cn,
cert.issuer_cn,
not_before,
not_after,
sans_json,
is_wildcard as i64,
is_self_signed as i64,
],
)
.map_err(|e| Error::Storage(format!("certs insert: {e}")))?;
self.conn
.execute(
"INSERT OR IGNORE INTO cert_seen_on (cert_sha256, domain, port) \
VALUES (?1, ?2, 443)",
params![sha, domain],
)
.map_err(|e| Error::Storage(format!("cert_seen_on insert: {e}")))?;
Ok(())
}
}
fn parse_iso_seconds(s: Option<&str>) -> Option<i64> {
let s = s?;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
OffsetDateTime::parse(s, &Rfc3339)
.ok()
.map(|t| t.unix_timestamp())
}
fn parse_boring_asn1_time(_s: Option<&str>) -> Option<i64> {
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn intel_stage_stringify() {
assert_eq!(IntelStage::Subdomains.as_str(), "subdomains");
assert_eq!(IntelStage::Dns.as_str(), "dns");
assert_eq!(IntelStage::Whois.as_str(), "whois");
assert_eq!(IntelStage::Cert.as_str(), "cert");
}
#[test]
fn parse_iso_rfc3339_seconds() {
assert!(parse_iso_seconds(Some("1998-11-25T12:41:54Z")).is_some());
assert!(parse_iso_seconds(Some("2024-01-15T10:00:00-03:00")).is_some());
assert_eq!(parse_iso_seconds(None), None);
assert_eq!(parse_iso_seconds(Some("not a date")), None);
}
#[test]
fn report_default_is_empty() {
let r = IntelReport::default();
assert_eq!(r.subdomains.len(), 0);
assert_eq!(r.dns_record_count, 0);
assert_eq!(r.unique_ips.len(), 0);
assert_eq!(r.certs_captured, 0);
assert!(r.errors.is_empty());
}
#[test]
fn schema_opens_on_fresh_tmp_db() {
let dir = tempfile::tempdir().expect("tmpdir");
let path = dir.path().join("intel-smoke.db");
let orch = TargetIntelOrchestrator::open(&path, InfraIntelConfig::default());
assert!(orch.is_ok(), "open: {:?}", orch.err());
let conn = Connection::open(&path).expect("reopen");
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='domains'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(count, 1);
}
}