Skip to main content

web_analyzer/
domain_validator.rs

1use reqwest::Client;
2use serde::{Deserialize, Serialize};
3use std::net::IpAddr;
4use std::sync::atomic::{AtomicUsize, Ordering};
5use std::sync::Arc;
6use std::time::{Duration, Instant};
7use tokio::process::Command;
8
9// ── Constants ───────────────────────────────────────────────────────────────
10
11const SKIP_PATTERNS: &[&str] = &[
12    "stun.l.google.com",
13    ".cloudapp.azure.com",
14    "clients6.google.com",
15    ".cdn.cloudflare.net",
16    "rr1.sn-",
17    "rr2.sn-",
18    "rr3.sn-",
19    "rr4.sn-",
20    "rr5.sn-",
21    "e-0014.e-msedge",
22    "s-part-",
23    ".t-msedge.net",
24    "perimeterx.map",
25    "i.ytimg.com",
26    "analytics-alv.google.com",
27    "signaler-pa.clients",
28    "westus-0.in.applicationinsights",
29];
30
31const INTERNAL_PATTERNS: &[&str] = &[
32    "localhost",
33    "127.0.0.1",
34    "0.0.0.0",
35    "192.168.",
36    "10.",
37    "172.16.",
38    "172.17.",
39    "172.18.",
40    "172.19.",
41    "172.20.",
42    "172.21.",
43    "172.22.",
44    "172.23.",
45    "172.24.",
46    "172.25.",
47    "172.26.",
48    "172.27.",
49    "172.28.",
50    "172.29.",
51    "172.30.",
52    "172.31.",
53];
54
55// ── Data Structures ─────────────────────────────────────────────────────────
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct ValidationResult {
59    pub domain: String,
60    pub valid: bool,
61    pub skip_reason: Option<String>,
62    pub dns_valid: bool,
63    pub http_valid: bool,
64    pub ssl_valid: bool,
65    pub dns_info: Option<DnsValidation>,
66    pub http_info: Option<HttpValidation>,
67    pub ssl_info: Option<SslValidation>,
68    pub errors: Vec<String>,
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct DnsValidation {
73    pub ip_addresses: Vec<String>,
74    pub mx_exists: bool,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct HttpValidation {
79    pub http_reachable: bool,
80    pub https_reachable: bool,
81    pub http_status: Option<u16>,
82    pub https_status: Option<u16>,
83    pub redirects_to_https: bool,
84    pub response_time_ms: u128,
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct SslValidation {
89    pub ssl_available: bool,
90    pub protocol_version: String,
91    pub cipher_suite: String,
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct ValidationStats {
96    pub total: usize,
97    pub valid: usize,
98    pub invalid: usize,
99    pub skipped: usize,
100    pub dns_failed: usize,
101    pub http_failed: usize,
102    pub ssl_failed: usize,
103    pub success_rate: f64,
104    pub processing_time_secs: f64,
105    pub domains_per_sec: f64,
106}
107
108#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct BulkValidationResult {
110    pub stats: ValidationStats,
111    pub valid_domains: Vec<String>,
112    pub results: Vec<ValidationResult>,
113}
114
115// ── Shared Counters ─────────────────────────────────────────────────────────
116
117struct AtomicStats {
118    valid: AtomicUsize,
119    invalid: AtomicUsize,
120    skipped: AtomicUsize,
121    dns_failed: AtomicUsize,
122    http_failed: AtomicUsize,
123    ssl_failed: AtomicUsize,
124}
125
126impl AtomicStats {
127    fn new() -> Self {
128        Self {
129            valid: AtomicUsize::new(0),
130            invalid: AtomicUsize::new(0),
131            skipped: AtomicUsize::new(0),
132            dns_failed: AtomicUsize::new(0),
133            http_failed: AtomicUsize::new(0),
134            ssl_failed: AtomicUsize::new(0),
135        }
136    }
137}
138
139// ── Public API ──────────────────────────────────────────────────────────────
140
141/// Validate a single domain comprehensively (DNS → HTTP → SSL)
142pub async fn validate_domain(domain: &str) -> ValidationResult {
143    let client = Client::builder()
144        .timeout(Duration::from_secs(10))
145        .danger_accept_invalid_certs(true)
146        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
147        .redirect(reqwest::redirect::Policy::limited(5))
148        .build()
149        .unwrap_or_else(|_| Client::new());
150
151    validate_single(&client, domain).await
152}
153
154/// Validate multiple domains in parallel with configurable concurrency
155pub async fn validate_domains_bulk(
156    domains: &[String],
157    max_concurrency: usize,
158) -> BulkValidationResult {
159    let start = Instant::now();
160    let total = domains.len();
161
162    let client = Client::builder()
163        .timeout(Duration::from_secs(10))
164        .danger_accept_invalid_certs(true)
165        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
166        .redirect(reqwest::redirect::Policy::limited(5))
167        .pool_max_idle_per_host(max_concurrency)
168        .build()
169        .unwrap_or_else(|_| Client::new());
170
171    let stats = Arc::new(AtomicStats::new());
172    let semaphore = Arc::new(tokio::sync::Semaphore::new(max_concurrency));
173
174    let mut handles = Vec::with_capacity(total);
175
176    for domain in domains {
177        let client = client.clone();
178        let domain = domain.clone();
179        let stats = Arc::clone(&stats);
180        let sem = Arc::clone(&semaphore);
181
182        handles.push(tokio::spawn(async move {
183            let _permit = sem.acquire().await.unwrap();
184            let result = validate_single(&client, &domain).await;
185
186            // Update counters
187            if result.skip_reason.is_some() {
188                stats.skipped.fetch_add(1, Ordering::Relaxed);
189            } else if result.valid {
190                stats.valid.fetch_add(1, Ordering::Relaxed);
191            } else {
192                stats.invalid.fetch_add(1, Ordering::Relaxed);
193                if !result.dns_valid {
194                    stats.dns_failed.fetch_add(1, Ordering::Relaxed);
195                }
196                if !result.http_valid && result.dns_valid {
197                    stats.http_failed.fetch_add(1, Ordering::Relaxed);
198                }
199                if !result.ssl_valid && result.dns_valid {
200                    stats.ssl_failed.fetch_add(1, Ordering::Relaxed);
201                }
202            }
203
204            result
205        }));
206    }
207
208    // Collect results
209    let mut results = Vec::with_capacity(total);
210    for handle in handles {
211        if let Ok(result) = handle.await {
212            results.push(result);
213        }
214    }
215
216    let elapsed = start.elapsed().as_secs_f64();
217    let valid_count = stats.valid.load(Ordering::Relaxed);
218
219    let valid_domains: Vec<String> = results
220        .iter()
221        .filter(|r| r.valid)
222        .map(|r| r.domain.clone())
223        .collect();
224
225    BulkValidationResult {
226        stats: ValidationStats {
227            total,
228            valid: valid_count,
229            invalid: stats.invalid.load(Ordering::Relaxed),
230            skipped: stats.skipped.load(Ordering::Relaxed),
231            dns_failed: stats.dns_failed.load(Ordering::Relaxed),
232            http_failed: stats.http_failed.load(Ordering::Relaxed),
233            ssl_failed: stats.ssl_failed.load(Ordering::Relaxed),
234            success_rate: if total > 0 {
235                (valid_count as f64 / total as f64) * 100.0
236            } else {
237                0.0
238            },
239            processing_time_secs: elapsed,
240            domains_per_sec: if elapsed > 0.0 {
241                total as f64 / elapsed
242            } else {
243                0.0
244            },
245        },
246        valid_domains,
247        results,
248    }
249}
250
251// ── Single Domain Validation ────────────────────────────────────────────────
252
253async fn validate_single(client: &Client, domain: &str) -> ValidationResult {
254    let mut result = ValidationResult {
255        domain: domain.to_string(),
256        valid: false,
257        skip_reason: None,
258        dns_valid: false,
259        http_valid: false,
260        ssl_valid: false,
261        dns_info: None,
262        http_info: None,
263        ssl_info: None,
264        errors: vec![],
265    };
266
267    // 1. Skip check
268    if let Some(reason) = should_skip(domain) {
269        result.skip_reason = Some(reason);
270        return result;
271    }
272
273    // 2. DNS validation
274    match validate_dns(domain).await {
275        Ok(dns) => {
276            result.dns_valid = true;
277            result.dns_info = Some(dns);
278        }
279        Err(e) => {
280            result.errors.push(format!("DNS: {}", e));
281            return result; // Skip HTTP/SSL if DNS fails
282        }
283    }
284
285    // 3. HTTP validation
286    match validate_http(client, domain).await {
287        Ok(http) => {
288            result.http_valid = http.http_reachable || http.https_reachable;
289            if !result.http_valid {
290                result
291                    .errors
292                    .push("HTTP: No HTTP/HTTPS connectivity".into());
293            }
294            result.http_info = Some(http);
295        }
296        Err(e) => {
297            result.errors.push(format!("HTTP: {}", e));
298        }
299    }
300
301    // 4. SSL validation
302    match validate_ssl(domain).await {
303        Ok(ssl) => {
304            result.ssl_valid = ssl.ssl_available;
305            result.ssl_info = Some(ssl);
306        }
307        Err(e) => {
308            result.errors.push(format!("SSL: {}", e));
309        }
310    }
311
312    // Overall: valid if DNS + HTTP pass
313    result.valid = result.dns_valid && result.http_valid;
314    result
315}
316
317// ── Skip Check ──────────────────────────────────────────────────────────────
318
319fn should_skip(domain: &str) -> Option<String> {
320    let lower = domain.to_lowercase();
321
322    // Skip patterns
323    for &pattern in SKIP_PATTERNS {
324        if lower.contains(pattern) {
325            return Some(format!("Matches skip pattern: {}", pattern));
326        }
327    }
328
329    // IP address
330    if domain.parse::<IpAddr>().is_ok() {
331        return Some("IP address detected".into());
332    }
333
334    // Internal/localhost
335    for &internal in INTERNAL_PATTERNS {
336        if lower.contains(internal) {
337            return Some("Internal/localhost domain".into());
338        }
339    }
340
341    // Length
342    if domain.len() < 4 || domain.len() > 253 {
343        return Some("Invalid domain length".into());
344    }
345
346    // Must contain at least one dot
347    if !domain.contains('.') {
348        return Some("No TLD detected".into());
349    }
350
351    None
352}
353
354// ── DNS Validation ──────────────────────────────────────────────────────────
355
356async fn validate_dns(domain: &str) -> Result<DnsValidation, String> {
357    // Resolve A records via dig
358    let a_output = Command::new("dig")
359        .args(["+short", "A", domain])
360        .output()
361        .await
362        .map_err(|e| format!("dig failed: {}", e))?;
363
364    let a_records: Vec<String> = String::from_utf8_lossy(&a_output.stdout)
365        .lines()
366        .map(|s| s.trim().to_string())
367        .filter(|s| !s.is_empty() && s.parse::<IpAddr>().is_ok())
368        .collect();
369
370    if a_records.is_empty() {
371        return Err("No A records found (NXDOMAIN or empty)".into());
372    }
373
374    // Check MX records
375    let mx_output = Command::new("dig")
376        .args(["+short", "MX", domain])
377        .output()
378        .await
379        .unwrap_or_else(|_| std::process::Output {
380            status: std::process::ExitStatus::default(),
381            stdout: vec![],
382            stderr: vec![],
383        });
384
385    let mx_exists = !String::from_utf8_lossy(&mx_output.stdout).trim().is_empty();
386
387    Ok(DnsValidation {
388        ip_addresses: a_records,
389        mx_exists,
390    })
391}
392
393// ── HTTP Validation ─────────────────────────────────────────────────────────
394
395async fn validate_http(client: &Client, domain: &str) -> Result<HttpValidation, String> {
396    let mut info = HttpValidation {
397        http_reachable: false,
398        https_reachable: false,
399        http_status: None,
400        https_status: None,
401        redirects_to_https: false,
402        response_time_ms: 0,
403    };
404
405    let start = Instant::now();
406
407    // HTTPS first (more common)
408    if let Ok(resp) = client.head(format!("https://{}", domain)).send().await {
409        info.https_reachable = true;
410        info.https_status = Some(resp.status().as_u16());
411        info.response_time_ms = start.elapsed().as_millis();
412
413        if resp.status().as_u16() < 500 {
414            return Ok(info);
415        }
416    }
417
418    // HTTP fallback
419    // Build a separate client that doesn't follow redirects for HTTP check
420    let no_redirect_client = Client::builder()
421        .timeout(Duration::from_secs(8))
422        .redirect(reqwest::redirect::Policy::none())
423        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
424        .build()
425        .unwrap_or_else(|_| Client::new());
426
427    if let Ok(resp) = no_redirect_client
428        .head(format!("http://{}", domain))
429        .send()
430        .await {
431        info.http_reachable = true;
432        info.http_status = Some(resp.status().as_u16());
433
434        // Check for HTTPS redirect
435        let status = resp.status().as_u16();
436        if [301, 302, 307, 308].contains(&status) {
437            if let Some(location) = resp.headers().get("location") {
438                if let Ok(loc) = location.to_str() {
439                    if loc.starts_with("https://") {
440                        info.redirects_to_https = true;
441                    }
442                }
443            }
444        }
445    }
446
447    if info.response_time_ms == 0 {
448        info.response_time_ms = start.elapsed().as_millis();
449    }
450
451    Ok(info)
452}
453
454// ── SSL Validation ──────────────────────────────────────────────────────────
455
456async fn validate_ssl(domain: &str) -> Result<SslValidation, String> {
457    let output = Command::new("openssl")
458        .args([
459            "s_client",
460            "-connect",
461            &format!("{}:443", domain),
462            "-servername",
463            domain,
464            "-brief",
465        ])
466        .stdin(std::process::Stdio::null())
467        .stdout(std::process::Stdio::piped())
468        .stderr(std::process::Stdio::piped())
469        .output()
470        .await
471        .map_err(|e| format!("openssl failed: {}", e))?;
472
473    let stderr = String::from_utf8_lossy(&output.stderr);
474    let stdout = String::from_utf8_lossy(&output.stdout);
475    let combined = format!("{}\n{}", stdout, stderr);
476
477    if combined.contains("CONNECTION ESTABLISHED") || combined.contains("Protocol") {
478        // Extract protocol version
479        let protocol = combined
480            .lines()
481            .find(|l| l.contains("Protocol version:") || l.starts_with("Protocol"))
482            .and_then(|l| l.split(':').nth(1))
483            .map(|s| s.trim().to_string())
484            .unwrap_or_else(|| "Unknown".into());
485
486        // Extract cipher
487        let cipher = combined
488            .lines()
489            .find(|l| l.contains("Ciphersuite:") || l.contains("Cipher"))
490            .and_then(|l| l.split(':').nth(1))
491            .map(|s| s.trim().to_string())
492            .unwrap_or_else(|| "Unknown".into());
493
494        Ok(SslValidation {
495            ssl_available: true,
496            protocol_version: protocol,
497            cipher_suite: cipher,
498        })
499    } else if output.status.success() || combined.contains("Verify") {
500        // openssl connected even without -brief details
501        Ok(SslValidation {
502            ssl_available: true,
503            protocol_version: "TLS".into(),
504            cipher_suite: "Unknown".into(),
505        })
506    } else {
507        Err("SSL connection failed".to_string())
508    }
509}