1use crate::error::{Error, Result};
6use regex::Regex;
7use reqwest::Client;
8use scraper::{Html, Selector};
9use serde::Deserialize;
10use std::collections::HashSet;
11use std::net::{IpAddr, ToSocketAddrs};
12use std::time::Duration;
13use url::Url;
14
15const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
17
18const TIMEOUT_SECS: u64 = 30;
20
21const WP_API_BASE: &str = "https://api.wordpress.org";
23
24const WP_JSON_PATH: &str = "/wp-json/";
26const WP_FEED_PATH: &str = "/feed/";
27const WP_README_PATH: &str = "/readme.html";
28
29const WP_COOKIE_PREFIXES: &[&str] = &["wordpress_", "wp-"];
31const WP_LANG_COOKIE: &str = "wp_lang";
32
33const SKIP_PLUGIN_SLUGS: &[&str] = &["index", "cache"];
35
36const ALLOWED_SCHEMES: &[&str] = &["http", "https"];
38
39#[derive(Debug, Clone)]
41pub struct ScanResult {
42 pub url: Url,
44 pub wordpress_detected: bool,
46 pub wordpress_version: Option<String>,
48 pub wordpress_latest: Option<String>,
50 pub theme: Option<ThemeInfo>,
52 pub plugins: Vec<PluginInfo>,
54}
55
56#[derive(Debug, Clone)]
58pub struct ThemeInfo {
59 pub slug: String,
61 pub version: Option<String>,
63 pub latest_version: Option<String>,
65}
66
67#[derive(Debug, Clone)]
69pub struct PluginInfo {
70 pub slug: String,
72 pub version: Option<String>,
74 pub latest_version: Option<String>,
76}
77
78#[derive(Debug, Deserialize)]
80struct PluginApiResponse {
81 version: Option<String>,
82}
83
84#[derive(Debug, Deserialize)]
86struct ThemeApiResponse {
87 version: Option<String>,
88}
89
90#[derive(Debug, Deserialize)]
92struct WpVersionResponse {
93 offers: Vec<WpVersionOffer>,
94}
95
96#[derive(Debug, Deserialize)]
97struct WpVersionOffer {
98 version: String,
99}
100
101#[derive(Debug, Deserialize)]
103struct WpJsonResponse {
104 name: Option<String>,
106 url: Option<String>,
108 namespaces: Option<Vec<String>>,
110}
111
112#[derive(Debug)]
114pub struct Scanner {
115 client: Client,
116 base_url: Url,
117}
118
119#[derive(Debug)]
121pub struct ScannerBuilder {
122 url: String,
123 allow_private: bool,
124}
125
126impl ScannerBuilder {
127 pub fn new(url: &str) -> Self {
129 Self {
130 url: url.to_string(),
131 allow_private: false,
132 }
133 }
134
135 pub fn allow_private(mut self, allow: bool) -> Self {
140 self.allow_private = allow;
141 self
142 }
143
144 pub fn build(self) -> Result<Scanner> {
146 Scanner::build_internal(&self.url, self.allow_private)
147 }
148}
149
150impl Scanner {
151 pub fn new(url: &str) -> Result<Self> {
156 Self::build_internal(url, false)
157 }
158
159 pub fn builder(url: &str) -> ScannerBuilder {
172 ScannerBuilder::new(url)
173 }
174
175 fn build_internal(url: &str, allow_private: bool) -> Result<Self> {
177 let url_with_scheme = if !url.contains("://") {
179 format!("https://{}", url)
180 } else {
181 url.to_string()
182 };
183
184 let base_url =
185 Url::parse(&url_with_scheme).map_err(|e| Error::InvalidUrl(e.to_string()))?;
186
187 if !ALLOWED_SCHEMES.contains(&base_url.scheme()) {
189 return Err(Error::InvalidUrl(format!(
190 "scheme '{}' not allowed (use http or https)",
191 base_url.scheme()
192 )));
193 }
194
195 if !allow_private {
197 Self::validate_host(&base_url)?;
198 }
199
200 let client = Client::builder()
201 .user_agent(USER_AGENT)
202 .timeout(Duration::from_secs(TIMEOUT_SECS))
203 .danger_accept_invalid_certs(false)
204 .build()
205 .map_err(|e| Error::HttpClient(e.to_string()))?;
206
207 Ok(Self { client, base_url })
208 }
209
210 fn validate_host(url: &Url) -> Result<()> {
212 let host = url
213 .host_str()
214 .ok_or_else(|| Error::InvalidUrl("missing host".to_string()))?;
215
216 if host == "localhost" || host.ends_with(".localhost") {
218 return Err(Error::InvalidUrl("localhost not allowed".to_string()));
219 }
220
221 let port = url
223 .port()
224 .unwrap_or(if url.scheme() == "https" { 443 } else { 80 });
225 let socket_addr = format!("{}:{}", host, port);
226
227 if let Ok(addrs) = socket_addr.to_socket_addrs() {
228 for addr in addrs {
229 if Self::is_internal_ip(addr.ip()) {
230 return Err(Error::InvalidUrl(format!(
231 "internal/private IP address not allowed: {}",
232 addr.ip()
233 )));
234 }
235 }
236 }
237
238 Ok(())
239 }
240
241 fn is_internal_ip(ip: IpAddr) -> bool {
243 match ip {
244 IpAddr::V4(ipv4) => {
245 ipv4.is_loopback() || ipv4.is_private() || ipv4.is_link_local() || ipv4.is_broadcast() || ipv4.is_unspecified() || ipv4.octets()[0] == 100 && ipv4.octets()[1] >= 64
252 && ipv4.octets()[1] <= 127
253 || ipv4.octets() == [169, 254, 169, 254] || ipv4.octets()[..2] == [192, 0] }
256 IpAddr::V6(ipv6) => {
257 ipv6.is_loopback() || ipv6.is_unspecified() || (ipv6.segments()[0] & 0xfe00) == 0xfc00
261 || (ipv6.segments()[0] & 0xffc0) == 0xfe80
263 }
264 }
265 }
266
267 pub async fn scan(&self) -> Result<ScanResult> {
269 let homepage_html = self.fetch_page(&self.base_url).await?;
271 let document = Html::parse_document(&homepage_html);
272
273 let wordpress_version = self.detect_wp_version(&document).await;
275
276 let wordpress_detected = wordpress_version.is_some()
278 || self.detect_wp_from_rest_api().await.is_some()
279 || self.detect_wp_from_cookies().await.is_some();
280
281 let wordpress_latest = self.fetch_wp_latest_version().await;
283
284 let theme = self.detect_theme(&document).await;
286
287 let plugins = self.detect_plugins(&document).await;
289
290 Ok(ScanResult {
291 url: self.base_url.clone(),
292 wordpress_detected,
293 wordpress_version,
294 wordpress_latest,
295 theme,
296 plugins,
297 })
298 }
299
300 async fn fetch_wp_latest_version(&self) -> Option<String> {
302 let url = format!("{}/core/version-check/1.7/", WP_API_BASE);
303 let response: WpVersionResponse =
304 self.client.get(&url).send().await.ok()?.json().await.ok()?;
305 response.offers.first().map(|o| o.version.clone())
306 }
307
308 async fn fetch_plugin_latest_version(&self, slug: &str) -> Option<String> {
310 let url = format!(
311 "{}/plugins/info/1.2/?action=plugin_information&slug={}",
312 WP_API_BASE, slug
313 );
314 let response: PluginApiResponse =
315 self.client.get(&url).send().await.ok()?.json().await.ok()?;
316 response.version
317 }
318
319 async fn fetch_theme_latest_version(&self, slug: &str) -> Option<String> {
321 let url = format!(
322 "{}/themes/info/1.2/?action=theme_information&slug={}",
323 WP_API_BASE, slug
324 );
325 let response: ThemeApiResponse =
326 self.client.get(&url).send().await.ok()?.json().await.ok()?;
327 response.version
328 }
329
330 async fn fetch_page(&self, url: &Url) -> Result<String> {
332 let response = self
333 .client
334 .get(url.as_str())
335 .send()
336 .await
337 .map_err(|e| Error::HttpRequest(e.to_string()))?;
338
339 if !response.status().is_success() {
340 return Err(Error::HttpStatus(response.status().as_u16()));
341 }
342
343 response
344 .text()
345 .await
346 .map_err(|e| Error::HttpRequest(e.to_string()))
347 }
348
349 async fn detect_wp_version(&self, document: &Html) -> Option<String> {
351 if let Some(version) = self.detect_version_from_meta(document) {
353 return Some(version);
354 }
355
356 if let Some(version) = self.detect_version_from_feed().await {
358 return Some(version);
359 }
360
361 self.detect_version_from_readme().await
363 }
364
365 fn detect_version_from_meta(&self, document: &Html) -> Option<String> {
367 let selector = Selector::parse("meta[name='generator']").ok()?;
368
369 for element in document.select(&selector) {
370 if let Some(content) = element.value().attr("content")
371 && content.starts_with("WordPress")
372 {
373 let version = content.strip_prefix("WordPress ")?.trim();
375 if !version.is_empty() {
376 return Some(version.to_string());
377 }
378 }
379 }
380 None
381 }
382
383 async fn detect_version_from_feed(&self) -> Option<String> {
385 let feed_url = self.base_url.join(WP_FEED_PATH).ok()?;
386 let html = self.fetch_page(&feed_url).await.ok()?;
387
388 let re = Regex::new(r"wordpress\.org/\?v=([0-9.]+)").ok()?;
390 re.captures(&html)?.get(1).map(|m| m.as_str().to_string())
391 }
392
393 async fn detect_version_from_readme(&self) -> Option<String> {
395 let readme_url = self.base_url.join(WP_README_PATH).ok()?;
396 let html = self.fetch_page(&readme_url).await.ok()?;
397
398 let re = Regex::new(r"Version\s+([0-9.]+)").ok()?;
400 re.captures(&html)?.get(1).map(|m| m.as_str().to_string())
401 }
402
403 async fn detect_wp_from_rest_api(&self) -> Option<()> {
405 let api_url = self.base_url.join(WP_JSON_PATH).ok()?;
406
407 let response = self.client.get(api_url.as_str()).send().await.ok()?;
408
409 if !response.status().is_success() {
410 return None;
411 }
412
413 let api_response: WpJsonResponse = response.json().await.ok()?;
415
416 if let Some(namespaces) = &api_response.namespaces
418 && namespaces.iter().any(|ns| ns.starts_with("wp/"))
419 {
420 return Some(());
421 }
422
423 if api_response.name.is_some() || api_response.url.is_some() {
425 return Some(());
426 }
427
428 None
429 }
430
431 async fn detect_wp_from_cookies(&self) -> Option<()> {
433 let response = self.client.get(self.base_url.as_str()).send().await.ok()?;
434
435 for cookie in response.cookies() {
437 let name = cookie.name();
438 let is_wp_cookie =
439 WP_COOKIE_PREFIXES.iter().any(|p| name.starts_with(p)) || name == WP_LANG_COOKIE;
440 if is_wp_cookie {
441 return Some(());
442 }
443 }
444
445 if let Some(set_cookie) = response.headers().get("set-cookie")
447 && let Ok(cookie_str) = set_cookie.to_str()
448 && WP_COOKIE_PREFIXES.iter().any(|p| cookie_str.contains(p))
449 {
450 return Some(());
451 }
452
453 None
454 }
455
456 async fn detect_theme(&self, document: &Html) -> Option<ThemeInfo> {
458 let link_selector = Selector::parse("link[rel='stylesheet']").ok()?;
460
461 for element in document.select(&link_selector) {
462 if let Some(href) = element.value().attr("href")
463 && let Some(mut theme) = self.extract_theme_from_url(href)
464 {
465 theme.latest_version = self.fetch_theme_latest_version(&theme.slug).await;
467 return Some(theme);
468 }
469 }
470
471 let style_re = Regex::new(r"/wp-content/themes/([^/]+)/").ok()?;
473
474 let html = document.html();
475 if let Some(caps) = style_re.captures(&html) {
476 let slug = caps.get(1)?.as_str().to_string();
477 let latest_version = self.fetch_theme_latest_version(&slug).await;
478 return Some(ThemeInfo {
479 slug,
480 version: None,
481 latest_version,
482 });
483 }
484
485 None
486 }
487
488 fn extract_theme_from_url(&self, url: &str) -> Option<ThemeInfo> {
490 let re = Regex::new(r"/wp-content/themes/([^/]+)/").ok()?;
492 let caps = re.captures(url)?;
493 let slug = caps.get(1)?.as_str().to_string();
494
495 let version = if let Some(v_pos) = url.find("ver=") {
497 let v_start = v_pos + 4;
498 let v_end = url[v_start..]
499 .find(|c: char| !c.is_ascii_alphanumeric() && c != '.' && c != '-' && c != '_')
500 .map(|i| v_start + i)
501 .unwrap_or(url.len());
502 let raw_version = url[v_start..v_end].to_string();
503 Some(Self::normalize_version(&raw_version))
504 } else {
505 None
506 };
507
508 Some(ThemeInfo {
509 slug,
510 version,
511 latest_version: None,
512 })
513 }
514
515 async fn detect_plugins(&self, document: &Html) -> Vec<PluginInfo> {
517 let mut plugin_slugs = HashSet::new();
518 let html = document.html();
519
520 let plugin_re = Regex::new(r"/wp-content/(?:mu-)?plugins/([a-zA-Z0-9_-]+)/").unwrap();
522
523 for caps in plugin_re.captures_iter(&html) {
524 if let Some(slug) = caps.get(1) {
525 let slug_str = slug.as_str().to_string();
526 if !SKIP_PLUGIN_SLUGS.contains(&slug_str.as_str()) {
527 plugin_slugs.insert(slug_str);
528 }
529 }
530 }
531
532 let mut plugins = Vec::new();
534 for slug in plugin_slugs {
535 let version = self.find_plugin_version(&html, &slug);
536 let latest_version = self.fetch_plugin_latest_version(&slug).await;
537 plugins.push(PluginInfo {
538 slug,
539 version,
540 latest_version,
541 });
542 }
543 plugins
544 }
545
546 fn find_plugin_version(&self, html: &str, slug: &str) -> Option<String> {
548 let pattern = format!(
550 r#"/wp-content/(?:mu-)?plugins/{}/[^'"]*\?[^'"]*ver=([0-9a-zA-Z._-]+)"#,
551 regex::escape(slug)
552 );
553 let re = Regex::new(&pattern).ok()?;
554 let caps = re.captures(html)?;
555 let version = caps.get(1)?.as_str().to_string();
556
557 Some(Self::normalize_version(&version))
559 }
560
561 fn normalize_version(version: &str) -> String {
563 if version.len() == 10
565 && version.chars().all(|c| c.is_ascii_digit())
566 && version.starts_with(['1', '2'])
567 {
568 return format!("(timestamp:{})", version);
569 }
570
571 if (version.len() == 40 || version.len() >= 7)
573 && version.chars().all(|c| c.is_ascii_hexdigit())
574 && !version.chars().all(|c| c.is_ascii_digit())
575 {
576 let short = if version.len() > 7 {
577 &version[..7]
578 } else {
579 version
580 };
581 return format!("(hash:{})", short);
582 }
583
584 version.to_string()
585 }
586}
587
588#[cfg(test)]
589mod tests {
590 use super::*;
591
592 #[test]
593 fn parse_valid_url() {
594 let scanner = Scanner::new("https://example.com");
596 assert!(scanner.is_ok());
597 }
598
599 #[test]
600 fn parse_invalid_url() {
601 let scanner = Scanner::new("not a url");
602 assert!(scanner.is_err());
603 }
604
605 #[test]
606 fn reject_localhost() {
607 let result = Scanner::new("http://localhost");
608 assert!(result.is_err());
609 assert!(result.unwrap_err().to_string().contains("localhost"));
610 }
611
612 #[test]
613 fn reject_localhost_subdomain() {
614 let result = Scanner::new("http://foo.localhost");
615 assert!(result.is_err());
616 }
617
618 #[test]
619 fn reject_file_scheme() {
620 let result = Scanner::new("file:///etc/passwd");
621 assert!(result.is_err());
622 assert!(result.unwrap_err().to_string().contains("scheme"));
623 }
624
625 #[test]
626 fn reject_ftp_scheme() {
627 let result = Scanner::new("ftp://example.com");
628 assert!(result.is_err());
629 assert!(result.unwrap_err().to_string().contains("scheme"));
630 }
631
632 #[test]
633 fn internal_ip_detection() {
634 use std::net::Ipv4Addr;
635
636 assert!(Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
638 10, 0, 0, 1
639 ))));
640 assert!(Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
641 172, 16, 0, 1
642 ))));
643 assert!(Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
644 192, 168, 1, 1
645 ))));
646
647 assert!(Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
649 127, 0, 0, 1
650 ))));
651
652 assert!(Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
654 169, 254, 1, 1
655 ))));
656
657 assert!(!Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
659 8, 8, 8, 8
660 ))));
661 assert!(!Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
662 93, 184, 216, 34
663 ))));
664 }
665
666 #[test]
667 fn normalize_semantic_version() {
668 assert_eq!(Scanner::normalize_version("1.2.3"), "1.2.3");
669 assert_eq!(Scanner::normalize_version("22.0.0"), "22.0.0");
670 assert_eq!(Scanner::normalize_version("7.0-alpha"), "7.0-alpha");
671 }
672
673 #[test]
674 fn normalize_timestamp_version() {
675 assert_eq!(
677 Scanner::normalize_version("1748271784"),
678 "(timestamp:1748271784)"
679 );
680 assert_eq!(
681 Scanner::normalize_version("1748268723"),
682 "(timestamp:1748268723)"
683 );
684 }
685
686 #[test]
687 fn normalize_hash_version() {
688 assert_eq!(
690 Scanner::normalize_version("569ab5664387d06c16a234c9771d3d57fb15720a"),
691 "(hash:569ab56)"
692 );
693 assert_eq!(Scanner::normalize_version("abcdef1"), "(hash:abcdef1)");
694 }
695
696 #[test]
697 fn normalize_date_version() {
698 assert_eq!(Scanner::normalize_version("20200121"), "20200121");
700 }
701}