use std::collections::BTreeSet;
use std::time::Duration;
use futures::StreamExt;
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use tracing::{debug, instrument};
use crate::error::{Result, SeerError};
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SubdomainResult {
pub domain: String,
pub subdomains: Vec<String>,
pub source: String,
pub count: usize,
}
pub struct SubdomainEnumerator;
impl Default for SubdomainEnumerator {
fn default() -> Self {
Self::new()
}
}
static HTTP_CLIENT: Lazy<Option<reqwest::Client>> = Lazy::new(|| {
reqwest::Client::builder()
.timeout(DEFAULT_TIMEOUT)
.user_agent("seer-domain-tool")
.redirect(reqwest::redirect::Policy::none())
.build()
.ok()
});
fn client() -> Result<&'static reqwest::Client> {
HTTP_CLIENT
.as_ref()
.ok_or_else(|| SeerError::HttpError("failed to initialize HTTP client".into()))
}
impl SubdomainEnumerator {
pub fn new() -> Self {
Self
}
#[instrument(skip(self), fields(domain = %domain))]
pub async fn enumerate(&self, domain: &str) -> Result<SubdomainResult> {
let domain = crate::validation::normalize_domain(domain)?;
debug!(domain = %domain, "Enumerating subdomains via CT logs");
let url = format!("https://crt.sh/?q=%25.{}&output=json", domain);
const MAX_CT_RESPONSE_SIZE: usize = 10 * 1024 * 1024;
let response = client()?
.get(&url)
.send()
.await
.map_err(|e| SeerError::HttpError(format!("CT log query failed: {}", e)))?;
if !response.status().is_success() {
return Err(SeerError::HttpError(format!(
"CT log returned status {}",
response.status()
)));
}
if let Some(content_length) = response.content_length() {
if content_length as usize > MAX_CT_RESPONSE_SIZE {
return Err(SeerError::HttpError(format!(
"CT log response too large: {} bytes (limit: {} bytes)",
content_length, MAX_CT_RESPONSE_SIZE
)));
}
}
let mut body: Vec<u8> = Vec::new();
let mut stream = response.bytes_stream();
let streamed = tokio::time::timeout(DEFAULT_TIMEOUT, async {
while let Some(chunk) = stream.next().await {
let chunk = chunk.map_err(|e| {
SeerError::HttpError(format!("Failed to read CT log response: {}", e))
})?;
if body.len() + chunk.len() > MAX_CT_RESPONSE_SIZE {
return Err(SeerError::HttpError(format!(
"CT log response too large (exceeds {} bytes)",
MAX_CT_RESPONSE_SIZE
)));
}
body.extend_from_slice(&chunk);
}
Ok::<(), SeerError>(())
})
.await;
match streamed {
Ok(Ok(())) => {}
Ok(Err(e)) => return Err(e),
Err(_) => {
return Err(SeerError::Timeout(format!(
"CT log body read timed out after {:?}",
DEFAULT_TIMEOUT
)));
}
}
let entries: Vec<CtLogEntry> = serde_json::from_slice(&body)
.map_err(|e| SeerError::HttpError(format!("Failed to parse CT log response: {}", e)))?;
let mut subdomains = BTreeSet::new();
let suffix = format!(".{}", domain);
for entry in &entries {
for name in entry.common_name.split('\n') {
let name = name.trim().to_lowercase();
if (name.ends_with(&suffix) || name == domain) && !name.starts_with('*') {
subdomains.insert(name);
}
}
if let Some(ref name_value) = entry.name_value {
for name in name_value.split('\n') {
let name = name.trim().to_lowercase();
if (name.ends_with(&suffix) || name == domain) && !name.starts_with('*') {
subdomains.insert(name);
}
}
}
}
subdomains.remove(&domain);
let subdomains: Vec<String> = subdomains
.into_iter()
.filter(|s| {
let s = s.strip_prefix("*.").unwrap_or(s);
!s.is_empty()
&& s.len() <= 253
&& s.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-')
&& !s.contains("..")
&& !s.starts_with('.')
&& !s.starts_with('-')
})
.collect();
let count = subdomains.len();
Ok(SubdomainResult {
domain,
subdomains,
source: "crt.sh (Certificate Transparency)".to_string(),
count,
})
}
}
#[derive(Debug, Deserialize)]
struct CtLogEntry {
#[serde(default)]
common_name: String,
#[serde(default)]
name_value: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_subdomain_result_serialization() {
let result = SubdomainResult {
domain: "example.com".to_string(),
subdomains: vec![
"api.example.com".to_string(),
"mail.example.com".to_string(),
],
source: "crt.sh (Certificate Transparency)".to_string(),
count: 2,
};
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("api.example.com"));
assert!(json.contains("mail.example.com"));
assert!(json.contains("crt.sh"));
}
#[test]
fn test_subdomain_enumerator_default() {
let enumerator = SubdomainEnumerator::default();
let _ = enumerator;
}
}