use anyhow::{Context, Result};
use clap::Parser;
use std::path::PathBuf;
mod llm;
mod output;
mod pattern;
mod san;
mod verify;
mod wordlist;
use llm::{apply_label_templates, batch_expand_positions, derive_multi_pair_templates, expand_words_with_llm, fetch_available_models, preflight_check, prompt_model_selection, query_patterns};
use output::Output;
use pattern::{apply_patterns, Candidate};
use san::{extract_sans_from_hosts, merge_sans_into_targets, normalize_host};
use verify::{detect_wildcard_dns, verify_candidates};
use wordlist::{
build_seed_wordlist, extract_base_domain, extract_subdomain_words,
generate_brute_candidates, generate_structured_candidates, GenerationContext,
parse_hostname_structure, subdomain_depth, HostnameStructure,
};
#[derive(Parser)]
#[command(name = "backDisco")]
#[command(about = "Discover backend origins from CDN frontends using LLM pattern analysis")]
struct Args {
#[arg(short = 'f', long, required = true, action = clap::ArgAction::Append)]
front: Vec<String>,
#[arg(short = 'b', long, required = true, action = clap::ArgAction::Append)]
back: Vec<String>,
#[arg(short = 't', long)]
targets: PathBuf,
#[arg(short = 'o', long)]
output: Option<PathBuf>,
#[arg(short = 'v', long, default_value = "1")]
verbose: u8,
#[arg(long)]
dns_only: bool,
#[arg(long, default_value = "5")]
timeout: u64,
#[arg(long, default_value = "50")]
concurrency: usize,
#[arg(long)]
extract_sans: bool,
#[arg(long)]
no_sans: bool,
#[arg(long)]
brute: bool,
#[arg(long, default_value = "5")]
llm_expand: usize,
#[arg(long)]
max_depth: Option<usize>,
#[arg(long)]
llmurl: Option<String>,
#[arg(long)]
model: Option<String>,
#[arg(long)]
gen_wordlist_output: Option<PathBuf>,
#[arg(long, default_value = "20")]
llm_batch_size: usize,
}
#[tokio::main]
async fn main() -> Result<()> {
rustls::crypto::ring::default_provider()
.install_default()
.expect("Failed to install rustls crypto provider");
let args = Args::parse();
let mut output = Output::new(args.verbose, args.output.clone())?;
let llm_base_url = args.llmurl.as_deref()
.unwrap_or("http://localhost:11434/v1");
let model = if let Some(model) = args.model {
model
} else {
output.info("[*] No model specified, fetching available models...");
let models = fetch_available_models(llm_base_url).await
.context("Failed to fetch available models")?;
if models.is_empty() {
anyhow::bail!("No models available. Please specify a model with --model");
}
let selected = prompt_model_selection(&models)
.context("Failed to select model")?;
output.success(&format!("[+] Selected model: {}", selected));
selected
};
output.info("[*] Pre-flight: Checking LLM endpoint connectivity...");
match preflight_check(llm_base_url, &model).await {
Ok(_) => {
output.success("[+] Pre-flight: LLM endpoint reachable");
}
Err(e) => {
output.error(&format!("[-] Pre-flight failed: {}", e));
anyhow::bail!("LLM endpoint unreachable: {}", e);
}
}
if args.front.len() != args.back.len() {
anyhow::bail!(
"Mismatched seed pairs: {} frontend(s) but {} backend(s). Each -f must have a corresponding -b.",
args.front.len(), args.back.len()
);
}
let seed_pairs: Vec<(String, String)> = args.front.iter()
.zip(args.back.iter())
.map(|(f, b)| (f.clone(), b.clone()))
.collect();
if seed_pairs.len() > 1 {
output.info(&format!("[*] Loaded {} seed pairs:", seed_pairs.len()));
for (i, (f, b)) in seed_pairs.iter().enumerate() {
output.info(&format!(" Pair {}: {} -> {}", i + 1, f, b));
}
}
let (backend_hostname, _) = normalize_host(&seed_pairs[0].1);
let backend_words = extract_subdomain_words(&backend_hostname);
let backend_depth = subdomain_depth(&backend_hostname);
let backend_base = extract_base_domain(&backend_hostname);
let max_depth = args.max_depth.unwrap_or(backend_depth.max(1));
output.info(&format!(
"[*] Backend analysis: {} -> base: {}, depth: {}, words: {:?}",
backend_hostname, backend_base, backend_depth, backend_words
));
output.info(&format!(
"[*] Reading targets from: {}",
args.targets.display()
));
let targets = std::fs::read_to_string(&args.targets)?
.lines()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect::<Vec<_>>();
if targets.is_empty() {
anyhow::bail!("No valid targets found in file");
}
output.info(&format!("[*] Loaded {} target frontends", targets.len()));
let (targets, discovered_wildcards, san_results) = if args.extract_sans && !args.no_sans {
output.info("[*] Extracting Subject Alternative Names from target certificates...");
let timeout = std::time::Duration::from_secs(args.timeout);
let san_results = extract_sans_from_hosts(&targets, args.concurrency, timeout).await;
let mut total_sans = 0;
let mut successful_extractions = 0;
for result in &san_results {
if result.error.is_none() {
successful_extractions += 1;
total_sans += result.sans.len();
if args.verbose >= 2 {
output.debug(&format!(
" {} -> {} SAN(s): {}",
result.host,
result.sans.len(),
result.sans.join(", ")
));
}
} else if args.verbose >= 2 {
output.debug(&format!(
" {} -> error: {}",
result.host,
result.error.as_ref().unwrap()
));
}
}
output.success(&format!(
"[+] Extracted {} SAN(s) from {}/{} targets",
total_sans, successful_extractions, targets.len()
));
let merge_result = merge_sans_into_targets(&targets, &san_results, max_depth);
let new_count = merge_result.targets.len() - targets.len();
if !merge_result.wildcards.is_empty() {
output.info(&format!(
"[*] Found {} wildcard SAN(s): {}",
merge_result.wildcards.len(),
merge_result.wildcards.join(", ")
));
}
if new_count > 0 {
output.success(&format!(
"[+] Added {} new hosts from SANs (total: {})",
new_count,
merge_result.targets.len()
));
} else {
output.info("[*] No new unique hosts found in SANs");
}
(merge_result.targets, merge_result.wildcard_bases, san_results)
} else {
(targets, Vec::new(), Vec::new())
};
let mut all_patterns = Vec::new();
for (front, back) in &seed_pairs {
output.info(&format!(
"[*] Deriving pattern: {} -> {}",
front, back
));
match query_patterns(llm_base_url, &model, front, back).await {
Ok(pair_patterns) => {
for p in &pair_patterns {
if !all_patterns.iter().any(|existing: &pattern::Pattern|
existing.find == p.find && existing.replace == p.replace && existing.position == p.position
) {
all_patterns.push(p.clone());
}
}
}
Err(e) => {
output.warn(&format!("[!] Failed to derive patterns from {} -> {}: {}", front, back, e));
}
}
}
let patterns = all_patterns;
output.success(&format!("[+] Derived {} unique pattern(s) from {} seed pair(s)", patterns.len(), seed_pairs.len()));
if args.verbose >= 2 {
for pattern in &patterns {
output.debug(&format!(
" Pattern: \"{}\" -> \"{}\" ({})",
pattern.find, pattern.replace, pattern.position
));
}
}
let mut candidates = apply_patterns(&targets, &patterns);
output.info(&format!(
"[*] Generated {} candidates from pattern matching",
candidates.len()
));
if seed_pairs.len() >= 2 {
let templates = derive_multi_pair_templates(&seed_pairs);
if !templates.is_empty() {
let template_candidates = apply_label_templates(&targets, &templates);
if !template_candidates.is_empty() {
output.success(&format!(
"[+] Generated {} additional candidates from multi-pair template analysis",
template_candidates.len()
));
if args.verbose >= 2 {
for t in &templates {
if t.position == "label_template" {
output.debug(&format!(" Template: {}", t.find));
}
}
}
for hostname in template_candidates {
candidates.push(Candidate { hostname });
}
candidates.sort_by(|a, b| a.hostname.cmp(&b.hostname));
candidates.dedup_by(|a, b| a.hostname == b.hostname);
}
}
}
if args.brute {
output.info("[*] Building position-aware structured candidate generation...");
let backend_structure = parse_hostname_structure(&seed_pairs[0].1);
output.info(&format!(
"[*] Backend structure: {} segments, base: {}",
backend_structure.subdomain_segments.len(),
backend_structure.base_domain
));
if args.verbose >= 2 {
output.debug(&format!(" Segments: {:?}", backend_structure.subdomain_segments));
}
let mut target_structures: Vec<HostnameStructure> = Vec::new();
for target in &targets {
let (hostname, _) = normalize_host(target);
target_structures.push(parse_hostname_structure(&hostname));
}
let mut san_structures: Vec<HostnameStructure> = Vec::new();
let max_san_structures = 1000; if args.extract_sans && !args.no_sans {
let mut san_count = 0;
for result in &san_results {
for san in &result.sans {
if san_count >= max_san_structures {
if args.verbose >= 1 {
output.info(&format!(
"[*] Limiting SAN structure parsing to {} (to avoid excessive LLM calls)",
max_san_structures
));
}
break;
}
if san.starts_with('*') {
continue;
}
if let Ok(_) = san.parse::<std::net::IpAddr>() {
continue;
}
let (hostname, _) = normalize_host(san);
san_structures.push(parse_hostname_structure(&hostname));
san_count += 1;
}
if san_count >= max_san_structures {
break;
}
}
if san_count >= max_san_structures && args.verbose >= 1 {
output.info("[*] SAN structure limit reached, some SANs were not parsed");
}
}
let mut all_structures = vec![backend_structure.clone()];
let max_target_structures = 50;
for structure in target_structures.iter().take(max_target_structures) {
all_structures.push(structure.clone());
}
let max_san_structures_for_expansion = 50;
for structure in san_structures.iter().take(max_san_structures_for_expansion) {
all_structures.push(structure.clone());
}
if target_structures.len() > max_target_structures || san_structures.len() > max_san_structures_for_expansion {
output.info(&format!(
"[*] Limiting structures for LLM expansion: {} targets, {} SANs (to avoid excessive LLM calls)",
max_target_structures,
max_san_structures_for_expansion
));
}
use std::collections::HashSet as DedupSet;
let mut seen = DedupSet::new();
let mut unique_structures = Vec::new();
for structure in &all_structures {
let signature = format!("{:?}", structure.subdomain_segments);
if seen.insert(signature) {
unique_structures.push(structure.clone());
}
}
let total_operations: usize = unique_structures.iter()
.map(|s| s.subdomain_segments.len())
.sum();
output.info(&format!(
"[*] Expanding positions across {} unique hostname structure(s) ({} total input, ~{} LLM batch calls with batch size {})...",
unique_structures.len(),
all_structures.len(),
(total_operations + args.llm_batch_size - 1) / args.llm_batch_size.max(1),
args.llm_batch_size
));
if unique_structures.len() < all_structures.len() {
output.info(&format!(
"[*] Deduplicated {} duplicate structures",
all_structures.len() - unique_structures.len()
));
}
use std::collections::HashSet as WordSet;
let mut total_batches = 0usize;
let mut position_word_counts = Vec::new();
let max_positions = unique_structures.iter()
.map(|s| s.subdomain_segments.len())
.max()
.unwrap_or(0);
for pos_idx in 0..max_positions {
let mut unique_words = WordSet::new();
for structure in &unique_structures {
if pos_idx < structure.subdomain_segments.len() {
unique_words.insert(structure.subdomain_segments[pos_idx].clone());
}
}
let word_count = unique_words.len();
position_word_counts.push(word_count);
total_batches += (word_count + args.llm_batch_size - 1) / args.llm_batch_size.max(1);
}
let brute_pb = output.create_brute_progress(
total_batches as u64,
"Expanding positions with LLM (batched)"
);
if args.verbose >= 1 {
output.info(&format!(
"[*] Position word counts: {:?} (total batches: {})",
position_word_counts,
total_batches
));
}
use std::sync::Arc;
let progress_bar = Arc::new(brute_pb.clone());
let expansions_result = batch_expand_positions(
llm_base_url,
&model,
&unique_structures,
Some(progress_bar.clone()),
args.llm_batch_size,
).await;
match expansions_result {
Ok(expansions) => {
progress_bar.finish_with_message("Position expansion complete");
output.success(&format!(
"[+] Expanded {} unique hostname structure(s)",
expansions.len()
));
let backend_expansions = if !expansions.is_empty() {
&expansions[0]
} else {
let fallback: Vec<Vec<String>> = backend_structure.subdomain_segments
.iter()
.map(|s| vec![s.clone()])
.collect();
let _fallback = fallback;
return Err(anyhow::anyhow!("No expansions generated"));
};
let effective_max_depth = backend_structure.subdomain_segments.len().min(max_depth);
output.info(&format!(
"[*] Generating structured candidates at depths {}..1 (max: {})",
effective_max_depth,
effective_max_depth
));
let mut brute_candidates = Vec::new();
let gen_pb = output.create_brute_progress(
effective_max_depth as u64,
"Generating structured candidates"
);
for depth in (1..=effective_max_depth).rev() {
gen_pb.set_message(format!("Generating candidates at depth {}", depth));
let context = GenerationContext {
max_depth: depth,
position_expansions: backend_expansions[..depth.min(backend_expansions.len())].to_vec(),
base_domain: backend_base.clone(),
};
let depth_candidates = generate_structured_candidates(&context);
brute_candidates.extend(depth_candidates);
gen_pb.set_position((effective_max_depth - depth + 1) as u64);
}
gen_pb.finish_with_message("Candidate generation complete");
brute_candidates.sort();
brute_candidates.dedup();
let initial_count = brute_candidates.len();
brute_candidates.retain(|candidate| {
let candidate_base = extract_base_domain(candidate);
candidate_base == backend_base
});
let filtered_count = initial_count - brute_candidates.len();
if filtered_count > 0 && args.verbose >= 1 {
output.info(&format!(
"[*] Filtered out {} candidates that don't match backend domain {}",
filtered_count, backend_base
));
}
output.success(&format!(
"[+] Generated {} structured candidates (matching {})",
brute_candidates.len(),
backend_base
));
for hostname in brute_candidates {
candidates.push(Candidate { hostname });
}
}
Err(e) => {
progress_bar.finish_with_message("Position expansion failed");
output.error(&format!("[-] LLM expansion failed: {}", e));
output.info("[*] Falling back to traditional brute force...");
let mut seed_words = backend_words.clone();
if args.llm_expand > 0 && !seed_words.is_empty() {
if let Ok(expanded) = expand_words_with_llm(llm_base_url, &model, &seed_words, args.llm_expand).await {
seed_words.extend(expanded);
}
}
let wordlist = build_seed_wordlist(&seed_words, &[]);
let mut brute_candidates = generate_brute_candidates(&wordlist, &backend_base, max_depth);
brute_candidates.retain(|c| extract_base_domain(c) == backend_base);
for hostname in brute_candidates {
candidates.push(Candidate { hostname });
}
}
}
candidates.sort_by(|a, b| a.hostname.cmp(&b.hostname));
candidates.dedup_by(|a, b| a.hostname == b.hostname);
output.info(&format!(
"[*] Total candidates after brute force: {}",
candidates.len()
));
} else {
candidates.sort_by(|a, b| a.hostname.cmp(&b.hostname));
candidates.dedup_by(|a, b| a.hostname == b.hostname);
}
if let Some(wordlist_path) = &args.gen_wordlist_output {
output.info(&format!(
"[*] Writing {} candidates to: {}",
candidates.len(),
wordlist_path.display()
));
let mut file = std::fs::File::create(wordlist_path)
.context("Failed to create wordlist output file")?;
use std::io::Write;
for candidate in &candidates {
writeln!(file, "{}", candidate.hostname)
.context("Failed to write to wordlist output file")?;
}
output.success(&format!(
"[+] Candidate list written to: {}",
wordlist_path.display()
));
}
let timeout = std::time::Duration::from_secs(args.timeout);
output.info("[*] Probing for wildcard DNS...");
let wildcard_domains = detect_wildcard_dns(&candidates, timeout).await;
if !wildcard_domains.is_empty() {
for (domain, ip) in &wildcard_domains {
output.warn(&format!(
"[!] Wildcard DNS detected: *.{} -> {} (results for this domain will be filtered)",
domain, ip
));
}
}
output.info(&format!(
"[*] Testing {} candidates with {} concurrent workers...",
candidates.len(),
args.concurrency
));
let verify_pb = output.create_verify_progress(candidates.len() as u64);
let results = verify_candidates(candidates, args.concurrency, timeout, args.dns_only, Some(verify_pb.clone())).await;
verify_pb.finish_with_message("Verification complete");
let mut live_count = 0;
let mut wildcard_filtered = 0;
for result in &results {
if result.is_live() {
if let Some(ip) = &result.dns_ip {
let labels: Vec<&str> = result.hostname.split('.').collect();
let base = if labels.len() >= 2 {
format!("{}.{}", labels[labels.len()-2], labels[labels.len()-1])
} else {
result.hostname.clone()
};
if let Some(wildcard_ip) = wildcard_domains.get(&base) {
if ip == wildcard_ip {
wildcard_filtered += 1;
if args.verbose >= 2 {
output.debug(&format!(
"[-] WILDCARD: {} -> {} (matches *.{} wildcard)",
result.hostname, ip, base
));
}
continue;
}
}
}
live_count += 1;
output.success(&format!("[+] LIVE: {}", result.hostname));
if let Some(ip) = &result.dns_ip {
output.info(&format!(" DNS: {}", ip));
}
if let Some(https_status) = &result.https_status {
output.info(&format!(" HTTPS: {}", https_status));
}
if let Some(http_status) = &result.http_status {
output.info(&format!(" HTTP: {}", http_status));
}
} else {
if args.verbose >= 2 {
output.debug(&format!(
"[-] DEAD: {} ({})",
result.hostname,
result.error_message()
));
}
}
}
if wildcard_filtered > 0 {
output.warn(&format!(
"[!] Filtered {} wildcard false positive(s)",
wildcard_filtered
));
}
output.info(&format!(
"\nSummary: {}/{} backends discovered{}",
live_count,
results.len(),
if wildcard_filtered > 0 {
format!(" ({} wildcard false positives filtered)", wildcard_filtered)
} else {
String::new()
}
));
Ok(())
}