Skip to main content

web_analyzer/
subdomain_discovery.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashSet;
3use std::time::Instant;
4use tokio::process::Command;
5use std::process::Stdio;
6use tokio::io::{AsyncBufReadExt, BufReader};
7
8/// Skip patterns for problematic/noise domains
9const SKIP_PATTERNS: &[&str] = &[
10    "stun.l.google.com",
11    ".cloudapp.azure.com",
12    "clients6.google.com",
13    ".cdn.cloudflare.net",
14    "rr1.sn-",
15    "rr2.sn-",
16    "rr3.sn-",
17    "rr4.sn-",
18    "rr5.sn-",
19    "e-0014.e-msedge",
20    "s-part-",
21    ".t-msedge.net",
22    "perimeterx.map",
23    "i.ytimg.com",
24    "analytics-alv.google.com",
25    "signaler-pa.clients",
26    "westus-0.in.applicationinsights",
27];
28
29/// Common multi-part TLDs for subdomain detection
30const COMMON_TLDS: &[&str] = &[
31    "co.uk", "com.tr", "gov.tr", "edu.tr", "org.tr", "net.tr", "co.jp", "co.kr", "co.id", "co.in",
32    "com.br", "com.au",
33];
34
35// ── Data Structures ─────────────────────────────────────────────────────────
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct SubdomainDetail {
39    pub host: String,
40    pub status: Option<u16>,
41    pub resolution_error: Option<String>,
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct SubdomainDiscoveryResult {
46    pub domain: String,
47    pub subdomains: Vec<SubdomainDetail>,
48    pub total_found: usize,
49    pub filtered_count: usize,
50    pub response_time_ms: u128,
51}
52
53// ── Public API ──────────────────────────────────────────────────────────────
54
55pub async fn discover_subdomains(
56    domain: &str,
57    progress_tx: Option<tokio::sync::mpsc::Sender<crate::ScanProgress>>
58) -> Result<SubdomainDiscoveryResult, Box<dyn std::error::Error + Send + Sync>> {
59    let start_time = Instant::now();
60
61    if let Some(tx) = &progress_tx {
62        let _ = tx.send(crate::ScanProgress {
63            module: "Subdomain".to_string(),
64            percentage: 10.0,
65            message: "Spawning high-concurrency subfinder process...".to_string(),
66            status: "ongoing".to_string()
67        }).await;
68    }
69
70    let mut command = Command::new("subfinder");
71    command.arg("-d").arg(domain).arg("-silent");
72    
73    command.stdout(Stdio::piped());
74    command.stderr(Stdio::null());
75
76    let mut child = command.spawn()?;
77    
78    let stdout = child.stdout.take().expect("Failed to capture stdout");
79    let mut reader = BufReader::new(stdout).lines();
80
81    let mut seen = HashSet::new();
82    let mut raw = Vec::new();
83    
84    let mut total_found = 0;
85
86    while let Some(line) = reader.next_line().await? {
87        let s = line.trim().to_lowercase();
88        if !s.is_empty() && seen.insert(s.clone()) {
89            raw.push(s.clone());
90            total_found += 1;
91            
92            if total_found % 20 == 0 {
93                if let Some(tx) = &progress_tx {
94                    let _ = tx.send(crate::ScanProgress {
95                        module: "Subdomain".to_string(),
96                        percentage: 50.0,
97                        message: format!("Discovered {} subdomains so far... [Latest: {}]", total_found, s),
98                        status: "ongoing".to_string()
99                    }).await;
100                }
101            }
102        }
103    }
104
105    child.wait().await?;
106
107    if let Some(tx) = &progress_tx {
108        let _ = tx.send(crate::ScanProgress {
109            module: "Subdomain".to_string(),
110            percentage: 90.0,
111            message: "Filtering noise and matching results against blocklists...".to_string(),
112            status: "ongoing".to_string()
113        }).await;
114    }
115
116    let raw_subdomains: Vec<String> = raw.into_iter().filter(|s| !should_skip(s)).collect();
117    let filtered_count = total_found - raw_subdomains.len();
118    
119    if let Some(tx) = &progress_tx {
120        let _ = tx.send(crate::ScanProgress {
121            module: "Subdomain".to_string(),
122            percentage: 92.0,
123            message: format!("Resolving HTTP status for {} unique subdomains...", raw_subdomains.len()),
124            status: "ongoing".to_string()
125        }).await;
126    }
127
128    use tokio::task::JoinSet;
129    use tokio::sync::Semaphore;
130    use std::sync::Arc;
131
132    let mut set = JoinSet::new();
133    let client = reqwest::Client::builder()
134        .timeout(std::time::Duration::from_secs(5))
135        .danger_accept_invalid_certs(true)
136        .redirect(reqwest::redirect::Policy::limited(3))
137        .build()
138        .unwrap_or_default();
139        
140    let semaphore = Arc::new(Semaphore::new(100));
141
142    for host in raw_subdomains.clone() {
143        let client_c = client.clone();
144        let sem_c = semaphore.clone();
145        set.spawn(async move {
146            let _permit = sem_c.acquire().await;
147            
148            // Probing HTTP -> HTTPS
149            let url_http = format!("http://{}", host);
150            match client_c.get(&url_http).send().await {
151                Ok(r) => {
152                    SubdomainDetail {
153                        host,
154                        status: Some(r.status().as_u16()),
155                        resolution_error: None,
156                    }
157                },
158                Err(e_http) => {
159                    // Try HTTPS if HTTP completely drops
160                    let url_https = format!("https://{}", host);
161                    match client_c.get(&url_https).send().await {
162                        Ok(r) => {
163                            SubdomainDetail {
164                                host,
165                                status: Some(r.status().as_u16()),
166                                resolution_error: None,
167                            }
168                        },
169                        Err(e_https) => {
170                            SubdomainDetail {
171                                host,
172                                status: None,
173                                resolution_error: Some(format!("HTTP: {} | HTTPS: {}", e_http, e_https)),
174                            }
175                        }
176                    }
177                }
178            }
179        });
180    }
181
182    let mut subdomains = Vec::new();
183    let total_to_resolve = raw_subdomains.len();
184    let mut resolved = 0;
185
186    while let Some(res) = set.join_next().await {
187        if let Ok(detail) = res {
188            subdomains.push(detail);
189            resolved += 1;
190            
191            if resolved % 25 == 0 {
192                if let Some(tx) = &progress_tx {
193                    let _ = tx.send(crate::ScanProgress {
194                        module: "Subdomain".to_string(),
195                        percentage: 92.0 + (7.0 * (resolved as f32 / total_to_resolve as f32).max(0.01)),
196                        message: format!("Resolved HTTP status for {}/{} subdomains...", resolved, total_to_resolve),
197                        status: "ongoing".to_string()
198                    }).await;
199                }
200            }
201        }
202    }
203
204    let duration = start_time.elapsed().as_millis();
205
206    if let Some(tx) = &progress_tx {
207        let _ = tx.send(crate::ScanProgress {
208            module: "Subdomain".to_string(),
209            percentage: 100.0,
210            message: "Subdomain footprint mapping and HTTP verification completed.".to_string(),
211            status: "completed".to_string()
212        }).await;
213    }
214
215    Ok(SubdomainDiscoveryResult {
216        domain: domain.to_string(),
217        subdomains,
218        total_found,
219        filtered_count,
220        response_time_ms: duration,
221    })
222}
223
224// ── Helpers ─────────────────────────────────────────────────────────────────
225
226/// Check if a domain matches any skip pattern
227fn should_skip(domain: &str) -> bool {
228    let lower = domain.to_lowercase();
229    SKIP_PATTERNS.iter().any(|p| lower.contains(p))
230}
231
232/// Detect whether a domain is a subdomain (not the root)
233pub fn is_subdomain(domain: &str) -> bool {
234    let parts: Vec<&str> = domain.split('.').collect();
235
236    // IP address check
237    if parts.iter().all(|p| p.parse::<u8>().is_ok()) || domain.contains(':') {
238        return false;
239    }
240
241    if parts.len() <= 2 {
242        return false;
243    }
244
245    // Check multi-part TLDs
246    let suffix = format!("{}.{}", parts[parts.len() - 2], parts[parts.len() - 1]);
247    if COMMON_TLDS.contains(&suffix.as_str()) {
248        return parts.len() > 3;
249    }
250
251    parts.len() > 2
252}