Skip to main content

web_analyzer/
domain_info.rs

1use regex::Regex;
2use reqwest::Client;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::time::{Duration, Instant};
6use tokio::io::{AsyncReadExt, AsyncWriteExt};
7use tokio::net::TcpStream;
8
9// ── WHOIS server database ───────────────────────────────────────────────────
10
11const WHOIS_SERVERS: &[(&str, &str)] = &[
12    ("com", "whois.verisign-grs.com"),
13    ("net", "whois.verisign-grs.com"),
14    ("org", "whois.pir.org"),
15    ("info", "whois.afilias.net"),
16    ("biz", "whois.biz"),
17    ("us", "whois.nic.us"),
18    ("uk", "whois.nic.uk"),
19    ("de", "whois.denic.de"),
20    ("fr", "whois.nic.fr"),
21    ("it", "whois.nic.it"),
22    ("nl", "whois.domain-registry.nl"),
23    ("eu", "whois.eu"),
24    ("ru", "whois.tcinet.ru"),
25    ("cn", "whois.cnnic.cn"),
26    ("jp", "whois.jprs.jp"),
27    ("br", "whois.registro.br"),
28    ("au", "whois.auda.org.au"),
29    ("ca", "whois.cira.ca"),
30    ("in", "whois.registry.in"),
31    ("tr", "whois.nic.tr"),
32    ("co", "whois.nic.co"),
33    ("io", "whois.nic.io"),
34    ("me", "whois.nic.me"),
35    ("tv", "whois.nic.tv"),
36    ("cc", "whois.nic.cc"),
37];
38
39/// Common ports for scanning
40const COMMON_PORTS: &[(u16, &str)] = &[
41    (21, "FTP"),
42    (22, "SSH"),
43    (25, "SMTP"),
44    (80, "HTTP"),
45    (443, "HTTPS"),
46    (3306, "MySQL"),
47    (5432, "PostgreSQL"),
48    (8080, "HTTP-Alt"),
49    (8443, "HTTPS-Alt"),
50];
51
52/// Security headers to check
53const SECURITY_HEADERS: &[&str] = &[
54    "strict-transport-security",
55    "x-frame-options",
56    "x-content-type-options",
57    "x-xss-protection",
58    "content-security-policy",
59];
60
61/// Privacy keywords in WHOIS output
62const PRIVACY_KEYWORDS: &[&str] = &[
63    "redacted",
64    "privacy",
65    "gdpr",
66    "protected",
67    "proxy",
68    "private",
69];
70
71// ── Data Structures ─────────────────────────────────────────────────────────
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct DomainInfoResult {
75    pub domain: String,
76    pub ipv4: Option<String>,
77    pub ipv6: Vec<String>,
78    pub all_ipv4: Vec<String>,
79    pub reverse_dns: Option<String>,
80    pub whois: WhoisInfo,
81    pub ssl: SslInfo,
82    pub dns: DnsInfo,
83    pub open_ports: Vec<String>,
84    pub http_status: Option<String>,
85    pub web_server: Option<String>,
86    pub response_time_ms: Option<f64>,
87    pub security: SecurityInfo,
88    pub security_score: u32,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct WhoisInfo {
93    pub registrar: String,
94    pub creation_date: String,
95    pub expiry_date: String,
96    pub last_updated: String,
97    pub domain_status: Vec<String>,
98    pub registrant: String,
99    pub privacy_protection: String,
100    #[serde(skip_serializing_if = "Vec::is_empty")]
101    pub name_servers: Vec<String>,
102}
103
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct SslInfo {
106    pub status: String,
107    #[serde(skip_serializing_if = "Option::is_none")]
108    pub issued_to: Option<String>,
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub issuer: Option<String>,
111    #[serde(skip_serializing_if = "Option::is_none")]
112    pub protocol_version: Option<String>,
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub expiry_date: Option<String>,
115    #[serde(skip_serializing_if = "Option::is_none")]
116    pub days_until_expiry: Option<i64>,
117    #[serde(skip_serializing_if = "Vec::is_empty")]
118    pub alternative_names: Vec<String>,
119}
120
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct DnsInfo {
123    pub nameservers: Vec<String>,
124    pub mx_records: Vec<String>,
125    pub txt_records: Vec<String>,
126    #[serde(skip_serializing_if = "Option::is_none")]
127    pub spf: Option<String>,
128    #[serde(skip_serializing_if = "Option::is_none")]
129    pub dmarc: Option<String>,
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct SecurityInfo {
134    pub https_available: bool,
135    pub https_redirect: bool,
136    pub security_headers: HashMap<String, String>,
137    pub headers_count: usize,
138}
139
140// ── Main function ───────────────────────────────────────────────────────────
141
142pub async fn get_domain_info(
143    domain: &str,
144) -> Result<DomainInfoResult, Box<dyn std::error::Error + Send + Sync>> {
145    let clean = clean_domain(domain);
146
147    let client = Client::builder()
148        .timeout(Duration::from_secs(5))
149        .danger_accept_invalid_certs(true)
150        .redirect(reqwest::redirect::Policy::limited(3))
151        .user_agent("Mozilla/5.0")
152        .build()?;
153
154    // ── IP Resolution ───────────────────────────────────────────────────
155    let (mut ipv4, mut all_ipv4, mut ipv6) = (None, vec![], vec![]);
156
157    if let Ok(addrs) = tokio::net::lookup_host(format!("{}:80", clean)).await {
158        for addr in addrs {
159            match addr.ip() {
160                std::net::IpAddr::V4(ip) => {
161                    all_ipv4.push(ip.to_string());
162                }
163                std::net::IpAddr::V6(ip) => {
164                    ipv6.push(ip.to_string());
165                }
166            }
167        }
168    }
169    if !all_ipv4.is_empty() {
170        ipv4 = Some(all_ipv4[0].clone());
171    }
172
173    // ── Reverse DNS ─────────────────────────────────────────────────────
174    let reverse_dns = if let Some(ref ip) = ipv4 {
175        reverse_dns_lookup(ip).await
176    } else {
177        None
178    };
179
180    // ── Run concurrent tasks ────────────────────────────────────────────
181    let whois_fut = query_whois(&clean);
182    let ssl_fut = check_ssl(&clean);
183    let dns_fut = get_dns_records(&clean);
184    let ports_fut = scan_ports(ipv4.as_deref());
185    let http_fut = check_http_status(&client, &clean);
186    let security_fut = check_security(&client, &clean);
187
188    let (whois, ssl, dns, open_ports, http_info, security) = tokio::join!(
189        whois_fut,
190        ssl_fut,
191        dns_fut,
192        ports_fut,
193        http_fut,
194        security_fut
195    );
196
197    // ── Security Score ──────────────────────────────────────────────────
198    let score = calculate_security_score(&ssl, &dns, &security);
199
200    Ok(DomainInfoResult {
201        domain: clean,
202        ipv4,
203        ipv6,
204        all_ipv4,
205        reverse_dns,
206        whois,
207        ssl,
208        dns,
209        open_ports,
210        http_status: http_info.0,
211        web_server: http_info.1,
212        response_time_ms: http_info.2,
213        security,
214        security_score: score,
215    })
216}
217
218// ── Domain cleaning ─────────────────────────────────────────────────────────
219
220fn clean_domain(domain: &str) -> String {
221    let d = domain
222        .trim_start_matches("https://")
223        .trim_start_matches("http://")
224        .replace("www.", "");
225    d.split('/')
226        .next()
227        .unwrap_or(&d)
228        .split(':')
229        .next()
230        .unwrap_or(&d)
231        .to_string()
232}
233
234// ── Reverse DNS ─────────────────────────────────────────────────────────────
235
236async fn reverse_dns_lookup(ip: &str) -> Option<String> {
237    let output = tokio::process::Command::new("dig")
238        .args(["+short", "-x", ip])
239        .output()
240        .await
241        .ok()?;
242    let text = String::from_utf8_lossy(&output.stdout).trim().to_string();
243    if text.is_empty() {
244        None
245    } else {
246        Some(text.trim_end_matches('.').to_string())
247    }
248}
249
250// ── WHOIS via TCP socket ────────────────────────────────────────────────────
251
252fn get_whois_server(domain: &str) -> &'static str {
253    let tld = domain.split('.').next_back().unwrap_or("");
254    WHOIS_SERVERS
255        .iter()
256        .find(|(t, _)| *t == tld)
257        .map(|(_, s)| *s)
258        .unwrap_or("whois.iana.org")
259}
260
261async fn query_whois_tcp(domain: &str, server: &str) -> Option<String> {
262    let addr = format!("{}:43", server);
263    let mut stream = tokio::time::timeout(Duration::from_secs(10), TcpStream::connect(&addr))
264        .await
265        .ok()?
266        .ok()?;
267
268    stream
269        .write_all(format!("{}\r\n", domain).as_bytes())
270        .await
271        .ok()?;
272
273    let mut buf = Vec::new();
274    let _ = tokio::time::timeout(Duration::from_secs(10), stream.read_to_end(&mut buf)).await;
275
276    Some(String::from_utf8_lossy(&buf).to_string())
277}
278
279async fn query_whois(domain: &str) -> WhoisInfo {
280    let mut info = WhoisInfo {
281        registrar: "Unknown".into(),
282        creation_date: "Unknown".into(),
283        expiry_date: "Unknown".into(),
284        last_updated: "Unknown".into(),
285        domain_status: vec![],
286        registrant: "Unknown".into(),
287        privacy_protection: "Unknown".into(),
288        name_servers: vec![],
289    };
290
291    let server = get_whois_server(domain);
292    let output = match query_whois_tcp(domain, server).await {
293        Some(o) if !o.is_empty() => o,
294        _ => return info,
295    };
296
297    // Follow referral
298    let final_output = if let Some(caps) = Regex::new(r"(?i)Registrar WHOIS Server:\s*(.+)")
299        .ok()
300        .and_then(|r| r.captures(&output))
301    {
302        let referral = caps
303            .get(1)
304            .unwrap()
305            .as_str()
306            .trim()
307            .replace("whois://", "")
308            .replace("http://", "")
309            .replace("https://", "");
310        query_whois_tcp(domain, &referral).await.unwrap_or(output)
311    } else {
312        output
313    };
314
315    // Parse registrar
316    for pat in &[
317        r"(?i)Registrar:\s*(.+)",
318        r"(?i)Registrar Name:\s*(.+)",
319        r"(?i)Registrar Organization:\s*(.+)",
320    ] {
321        if let Some(m) = Regex::new(pat).ok().and_then(|r| r.captures(&final_output)) {
322            info.registrar = m.get(1).unwrap().as_str().trim().to_string();
323            break;
324        }
325    }
326
327    // Parse creation date
328    for pat in &[
329        r"(?i)Creation Date:\s*(.+)",
330        r"(?i)Created Date:\s*(.+)",
331        r"(?i)Created:\s*(.+)",
332        r"(?i)Registration Time:\s*(.+)",
333    ] {
334        if let Some(m) = Regex::new(pat).ok().and_then(|r| r.captures(&final_output)) {
335            info.creation_date = m
336                .get(1)
337                .unwrap()
338                .as_str()
339                .trim()
340                .split('\n')
341                .next()
342                .unwrap_or("")
343                .to_string();
344            break;
345        }
346    }
347
348    // Parse expiry date
349    for pat in &[
350        r"(?i)Registry Expiry Date:\s*(.+)",
351        r"(?i)Registrar Registration Expiration Date:\s*(.+)",
352        r"(?i)Expir(?:y|ation) Date:\s*(.+)",
353        r"(?i)expires:\s*(.+)",
354        r"(?i)Expiration Time:\s*(.+)",
355    ] {
356        if let Some(m) = Regex::new(pat).ok().and_then(|r| r.captures(&final_output)) {
357            info.expiry_date = m
358                .get(1)
359                .unwrap()
360                .as_str()
361                .trim()
362                .split('\n')
363                .next()
364                .unwrap_or("")
365                .to_string();
366            break;
367        }
368    }
369
370    // Parse updated date
371    for pat in &[
372        r"(?i)Updated Date:\s*(.+)",
373        r"(?i)Last Updated:\s*(.+)",
374        r"(?i)last-update:\s*(.+)",
375        r"(?i)Modified Date:\s*(.+)",
376    ] {
377        if let Some(m) = Regex::new(pat).ok().and_then(|r| r.captures(&final_output)) {
378            info.last_updated = m
379                .get(1)
380                .unwrap()
381                .as_str()
382                .trim()
383                .split('\n')
384                .next()
385                .unwrap_or("")
386                .to_string();
387            break;
388        }
389    }
390
391    // Parse domain status
392    if let Ok(rx) = Regex::new(r"(?i)(?:Domain )?Status:\s*(.+)") {
393        info.domain_status = rx
394            .captures_iter(&final_output)
395            .filter_map(|c| {
396                c.get(1).map(|m| {
397                    m.as_str()
398                        .split_whitespace()
399                        .next()
400                        .unwrap_or("")
401                        .to_string()
402                })
403            })
404            .filter(|s| !s.is_empty())
405            .take(3)
406            .collect();
407    }
408    if info.domain_status.is_empty() {
409        info.domain_status.push("Unknown".into());
410    }
411
412    // Parse registrant
413    for pat in &[
414        r"(?i)Registrant Name:\s*(.+)",
415        r"(?i)Registrant:\s*(.+)",
416        r"(?i)Registrant Organization:\s*(.+)",
417    ] {
418        if let Some(m) = Regex::new(pat).ok().and_then(|r| r.captures(&final_output)) {
419            let val = m
420                .get(1)
421                .unwrap()
422                .as_str()
423                .trim()
424                .split('\n')
425                .next()
426                .unwrap_or("")
427                .to_string();
428            if !val.is_empty() {
429                info.registrant = val;
430                break;
431            }
432        }
433    }
434
435    // Privacy protection
436    let lower = final_output.to_lowercase();
437    info.privacy_protection = if PRIVACY_KEYWORDS.iter().any(|k| lower.contains(k)) {
438        "Active".into()
439    } else {
440        "Inactive".into()
441    };
442
443    // Name servers
444    if let Ok(rx) = Regex::new(r"(?i)Name Server:\s*(.+)") {
445        info.name_servers = rx
446            .captures_iter(&final_output)
447            .filter_map(|c| c.get(1).map(|m| m.as_str().trim().to_lowercase()))
448            .take(4)
449            .collect();
450    }
451
452    info
453}
454
455// ── SSL Certificate ─────────────────────────────────────────────────────────
456
457async fn check_ssl(domain: &str) -> SslInfo {
458    // Use openssl s_client to get certificate info
459    let output = match tokio::process::Command::new("openssl")
460        .args([
461            "s_client",
462            "-connect",
463            &format!("{}:443", domain),
464            "-servername",
465            domain,
466        ])
467        .stdin(std::process::Stdio::null())
468        .stdout(std::process::Stdio::piped())
469        .stderr(std::process::Stdio::piped())
470        .output()
471        .await
472    {
473        Ok(o) => String::from_utf8_lossy(&o.stdout).to_string(),
474        Err(_) => {
475            return SslInfo {
476                status: "Error".into(),
477                issued_to: None,
478                issuer: None,
479                protocol_version: None,
480                expiry_date: None,
481                days_until_expiry: None,
482                alternative_names: vec![],
483            }
484        }
485    };
486
487    if output.contains("CONNECTED") {
488        let mut ssl = SslInfo {
489            status: "Valid".into(),
490            issued_to: None,
491            issuer: None,
492            protocol_version: None,
493            expiry_date: None,
494            days_until_expiry: None,
495            alternative_names: vec![],
496        };
497
498        // Extract subject CN
499        if let Some(m) = Regex::new(r"subject=.*?CN\s*=\s*([^\n/,]+)")
500            .ok()
501            .and_then(|r| r.captures(&output))
502        {
503            ssl.issued_to = Some(m.get(1).unwrap().as_str().trim().to_string());
504        }
505
506        // Extract issuer CN
507        if let Some(m) = Regex::new(r"issuer=.*?CN\s*=\s*([^\n/,]+)")
508            .ok()
509            .and_then(|r| r.captures(&output))
510        {
511            ssl.issuer = Some(m.get(1).unwrap().as_str().trim().to_string());
512        }
513
514        // Extract protocol
515        if let Some(m) = Regex::new(r"Protocol\s*:\s*(.+)")
516            .ok()
517            .and_then(|r| r.captures(&output))
518        {
519            ssl.protocol_version = Some(m.get(1).unwrap().as_str().trim().to_string());
520        }
521
522        // Get dates via openssl x509
523        if let Ok(cert_output) = tokio::process::Command::new("sh")
524            .args(["-c", &format!("echo | openssl s_client -connect {}:443 -servername {} 2>/dev/null | openssl x509 -noout -dates -subject -ext subjectAltName 2>/dev/null", domain, domain)])
525            .output()
526            .await
527        {
528            let cert_text = String::from_utf8_lossy(&cert_output.stdout);
529
530            if let Some(m) = Regex::new(r"notAfter=(.+)").ok().and_then(|r| r.captures(&cert_text)) {
531                let expiry_str = m.get(1).unwrap().as_str().trim().to_string();
532                ssl.expiry_date = Some(expiry_str.clone());
533
534                // Compute days_until_expiry from parsed date
535                // OpenSSL format: "Jun 15 12:00:00 2025 GMT"
536                if let Ok(expiry) = chrono::NaiveDateTime::parse_from_str(
537                    expiry_str.trim_end_matches(" GMT").trim_end_matches(" UTC"),
538                    "%b %d %H:%M:%S %Y",
539                ) {
540                    let now = chrono::Utc::now().naive_utc();
541                    ssl.days_until_expiry = Some((expiry - now).num_days());
542                }
543            }
544
545            // Extract SANs
546            if let Some(san_section) = cert_text.split("X509v3 Subject Alternative Name:").nth(1) {
547                let names: Vec<String> = Regex::new(r"DNS:([^,\s]+)")
548                    .ok()
549                    .map(|r| r.captures_iter(san_section).filter_map(|c| c.get(1).map(|m| m.as_str().to_string())).take(5).collect())
550                    .unwrap_or_default();
551                ssl.alternative_names = names;
552            }
553        }
554
555        ssl
556    } else {
557        SslInfo {
558            status: "HTTPS not available".into(),
559            issued_to: None,
560            issuer: None,
561            protocol_version: None,
562            expiry_date: None,
563            days_until_expiry: None,
564            alternative_names: vec![],
565        }
566    }
567}
568
569// ── DNS Records via dig ─────────────────────────────────────────────────────
570
571async fn dig_query(domain: &str, rtype: &str) -> Vec<String> {
572    tokio::process::Command::new("dig")
573        .args(["+short", rtype, domain])
574        .output()
575        .await
576        .ok()
577        .and_then(|o| String::from_utf8(o.stdout).ok())
578        .map(|t| {
579            t.lines()
580                .filter(|l| !l.trim().is_empty() && !l.starts_with(';'))
581                .map(|l| l.trim().to_string())
582                .collect()
583        })
584        .unwrap_or_default()
585}
586
587async fn get_dns_records(domain: &str) -> DnsInfo {
588    let (ns, mx, txt) = tokio::join!(
589        dig_query(domain, "NS"),
590        dig_query(domain, "MX"),
591        dig_query(domain, "TXT"),
592    );
593
594    let spf = txt.iter().find(|t| t.contains("v=spf1")).cloned();
595    let dmarc_records = dig_query(&format!("_dmarc.{}", domain), "TXT").await;
596    let dmarc = dmarc_records.into_iter().find(|t| t.contains("v=DMARC1"));
597
598    DnsInfo {
599        nameservers: ns,
600        mx_records: mx,
601        txt_records: txt,
602        spf,
603        dmarc,
604    }
605}
606
607// ── Port Scanning ───────────────────────────────────────────────────────────
608
609async fn scan_ports(ip: Option<&str>) -> Vec<String> {
610    let ip = match ip {
611        Some(ip) => ip,
612        None => return vec![],
613    };
614
615    let mut results = Vec::new();
616    let mut handles = Vec::new();
617
618    for &(port, service) in COMMON_PORTS {
619        let addr = format!("{}:{}", ip, port);
620        handles.push(tokio::spawn(async move {
621            match tokio::time::timeout(Duration::from_secs(1), TcpStream::connect(&addr)).await {
622                Ok(Ok(_)) => Some(format!("{}/{}", port, service)),
623                _ => None,
624            }
625        }));
626    }
627
628    for handle in handles {
629        if let Ok(Some(port_str)) = handle.await {
630            results.push(port_str);
631        }
632    }
633
634    results.sort();
635    results
636}
637
638// ── HTTP Status Check ───────────────────────────────────────────────────────
639
640async fn check_http_status(
641    client: &Client,
642    domain: &str,
643) -> (Option<String>, Option<String>, Option<f64>) {
644    for proto in &["https", "http"] {
645        let url = format!("{}://{}", proto, domain);
646        let start = Instant::now();
647        match client.get(&url).send().await {
648            Ok(resp) => {
649                let elapsed = start.elapsed().as_secs_f64() * 1000.0;
650                let status_str = format!("{} - {}", resp.status().as_u16(), proto.to_uppercase());
651                let server = resp
652                    .headers()
653                    .get("server")
654                    .and_then(|v| v.to_str().ok())
655                    .map(|s| s.to_string());
656                return (
657                    Some(status_str),
658                    server,
659                    Some((elapsed * 100.0).round() / 100.0),
660                );
661            }
662            Err(_) => continue,
663        }
664    }
665    (None, None, None)
666}
667
668// ── Security Check ──────────────────────────────────────────────────────────
669
670async fn check_security(client: &Client, domain: &str) -> SecurityInfo {
671    let mut sec = SecurityInfo {
672        https_available: false,
673        https_redirect: false,
674        security_headers: HashMap::new(),
675        headers_count: 0,
676    };
677
678    // HTTPS + security headers
679    if let Ok(resp) = client.get(format!("https://{}", domain)).send().await {
680        sec.https_available = true;
681        for header in SECURITY_HEADERS {
682            if let Some(val) = resp.headers().get(*header) {
683                if let Ok(v) = val.to_str() {
684                    sec.security_headers
685                        .insert(header.to_string(), v.to_string());
686                    sec.headers_count += 1;
687                }
688            }
689        }
690    }
691
692    // HTTP → HTTPS redirect
693    if let Ok(resp) = client.get(format!("http://{}", domain)).send().await {
694        let final_url = resp.url().to_string();
695        if final_url.starts_with("https://") {
696            sec.https_redirect = true;
697        }
698    }
699
700    sec
701}
702
703// ── Security Score (0-100) ──────────────────────────────────────────────────
704
705fn calculate_security_score(ssl: &SslInfo, dns: &DnsInfo, security: &SecurityInfo) -> u32 {
706    let mut score: u32 = 0;
707
708    // HTTPS available (+30)
709    if security.https_available {
710        score += 30;
711    }
712
713    // HTTPS redirect (+10)
714    if security.https_redirect {
715        score += 10;
716    }
717
718    // SSL valid (+20)
719    if ssl.status == "Valid" {
720        score += 20;
721    }
722
723    // Security headers (up to +20, 4 points each)
724    score += (security.headers_count as u32 * 4).min(20);
725
726    // SPF record (+10)
727    if dns.spf.is_some() {
728        score += 10;
729    }
730
731    // DMARC record (+10)
732    if dns.dmarc.is_some() {
733        score += 10;
734    }
735
736    score
737}