use std::collections::BTreeSet;
use std::time::Duration;
use futures::StreamExt;
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use tracing::{debug, instrument};
use crate::error::{Result, SeerError};
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
const MAX_ATTEMPTS: u32 = 3;
const RETRY_BASE_BACKOFF: Duration = Duration::from_millis(500);
const MAX_CT_RESPONSE_SIZE: usize = 10 * 1024 * 1024;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SubdomainResult {
pub domain: String,
pub subdomains: Vec<String>,
pub source: String,
pub count: usize,
}
pub struct SubdomainEnumerator;
impl Default for SubdomainEnumerator {
fn default() -> Self {
Self::new()
}
}
static HTTP_CLIENT: Lazy<Option<reqwest::Client>> = Lazy::new(|| {
reqwest::Client::builder()
.timeout(DEFAULT_TIMEOUT)
.user_agent("seer-domain-tool")
.redirect(reqwest::redirect::Policy::none())
.build()
.ok()
});
fn client() -> Result<&'static reqwest::Client> {
HTTP_CLIENT
.as_ref()
.ok_or_else(|| SeerError::HttpError("failed to initialize HTTP client".into()))
}
impl SubdomainEnumerator {
pub fn new() -> Self {
Self
}
#[instrument(skip(self), fields(domain = %domain))]
pub async fn enumerate(&self, domain: &str) -> Result<SubdomainResult> {
let domain = crate::validation::normalize_domain(domain)?;
debug!(domain = %domain, "Enumerating subdomains via CT logs");
let url = format!("https://crt.sh/?q=%25.{}&output=json", domain);
let body = fetch_with_retry(&url).await?;
let entries: Vec<CtLogEntry> = serde_json::from_slice(&body)
.map_err(|e| SeerError::HttpError(format!("Failed to parse CT log response: {}", e)))?;
let mut subdomains = BTreeSet::new();
let suffix = format!(".{}", domain);
for entry in &entries {
for name in entry.common_name.split('\n') {
let name = name.trim().to_lowercase();
if (name.ends_with(&suffix) || name == domain) && !name.starts_with('*') {
subdomains.insert(name);
}
}
if let Some(ref name_value) = entry.name_value {
for name in name_value.split('\n') {
let name = name.trim().to_lowercase();
if (name.ends_with(&suffix) || name == domain) && !name.starts_with('*') {
subdomains.insert(name);
}
}
}
}
subdomains.remove(&domain);
let subdomains: Vec<String> = subdomains
.into_iter()
.filter(|s| {
let s = s.strip_prefix("*.").unwrap_or(s);
!s.is_empty()
&& s.len() <= 253
&& s.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-')
&& !s.contains("..")
&& !s.starts_with('.')
&& !s.starts_with('-')
})
.collect();
let count = subdomains.len();
Ok(SubdomainResult {
domain,
subdomains,
source: "crt.sh (Certificate Transparency)".to_string(),
count,
})
}
}
async fn fetch_with_retry(url: &str) -> Result<Vec<u8>> {
let mut last_err: Option<SeerError> = None;
for attempt in 0..MAX_ATTEMPTS {
match fetch_once(url).await {
Ok(body) => return Ok(body),
Err(FetchOutcome::Terminal(e)) => return Err(e),
Err(FetchOutcome::Retryable(e)) => {
debug!(
attempt = attempt + 1,
max_attempts = MAX_ATTEMPTS,
error = %e,
"Transient CT log failure, retrying"
);
last_err = Some(e);
if attempt + 1 < MAX_ATTEMPTS {
let backoff = RETRY_BASE_BACKOFF * 2u32.pow(attempt);
tokio::time::sleep(backoff).await;
}
}
}
}
Err(last_err.unwrap_or_else(|| {
SeerError::HttpError("CT log query failed with no recorded error".into())
}))
}
async fn fetch_once(url: &str) -> std::result::Result<Vec<u8>, FetchOutcome> {
let response = client()
.map_err(FetchOutcome::Terminal)?
.get(url)
.send()
.await
.map_err(|e| {
FetchOutcome::Retryable(SeerError::HttpError(format!("CT log query failed: {}", e)))
})?;
let status = response.status();
if !status.is_success() {
let err = SeerError::HttpError(format!("CT log returned status {}", status));
return Err(if status.is_server_error() {
FetchOutcome::Retryable(err)
} else {
FetchOutcome::Terminal(err)
});
}
if let Some(content_length) = response.content_length() {
if content_length as usize > MAX_CT_RESPONSE_SIZE {
return Err(FetchOutcome::Terminal(SeerError::HttpError(format!(
"CT log response too large: {} bytes (limit: {} bytes)",
content_length, MAX_CT_RESPONSE_SIZE
))));
}
}
let mut body: Vec<u8> = Vec::new();
let mut stream = response.bytes_stream();
let streamed = tokio::time::timeout(DEFAULT_TIMEOUT, async {
while let Some(chunk) = stream.next().await {
let chunk = chunk.map_err(|e| {
FetchOutcome::Retryable(SeerError::HttpError(format!(
"Failed to read CT log response: {}",
e
)))
})?;
if body.len() + chunk.len() > MAX_CT_RESPONSE_SIZE {
return Err(FetchOutcome::Terminal(SeerError::HttpError(format!(
"CT log response too large (exceeds {} bytes)",
MAX_CT_RESPONSE_SIZE
))));
}
body.extend_from_slice(&chunk);
}
Ok(body)
})
.await;
match streamed {
Ok(Ok(body)) => Ok(body),
Ok(Err(e)) => Err(e),
Err(_) => Err(FetchOutcome::Retryable(SeerError::Timeout(format!(
"CT log body read timed out after {:?}",
DEFAULT_TIMEOUT
)))),
}
}
enum FetchOutcome {
Retryable(SeerError),
Terminal(SeerError),
}
#[derive(Debug, Deserialize)]
struct CtLogEntry {
#[serde(default)]
common_name: String,
#[serde(default)]
name_value: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_subdomain_result_serialization() {
let result = SubdomainResult {
domain: "example.com".to_string(),
subdomains: vec![
"api.example.com".to_string(),
"mail.example.com".to_string(),
],
source: "crt.sh (Certificate Transparency)".to_string(),
count: 2,
};
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("api.example.com"));
assert!(json.contains("mail.example.com"));
assert!(json.contains("crt.sh"));
}
#[test]
fn test_subdomain_enumerator_default() {
let enumerator = SubdomainEnumerator::default();
let _ = enumerator;
}
}