use crate::error::{Error, Result};
use regex::Regex;
use reqwest::Client;
use scraper::{Html, Selector};
use serde::Deserialize;
use std::collections::HashSet;
use std::net::{IpAddr, ToSocketAddrs};
use std::time::Duration;
use url::Url;
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
const TIMEOUT_SECS: u64 = 30;
const WP_API_BASE: &str = "https://api.wordpress.org";
const WP_JSON_PATH: &str = "/wp-json/";
const WP_FEED_PATH: &str = "/feed/";
const WP_README_PATH: &str = "/readme.html";
const WP_COOKIE_PREFIXES: &[&str] = &["wordpress_", "wp-"];
const WP_LANG_COOKIE: &str = "wp_lang";
const SKIP_PLUGIN_SLUGS: &[&str] = &["index", "cache"];
const ALLOWED_SCHEMES: &[&str] = &["http", "https"];
#[derive(Debug, Clone)]
pub struct ScanResult {
pub url: Url,
pub wordpress_detected: bool,
pub wordpress_version: Option<String>,
pub wordpress_latest: Option<String>,
pub theme: Option<ThemeInfo>,
pub plugins: Vec<PluginInfo>,
}
#[derive(Debug, Clone)]
pub struct ThemeInfo {
pub slug: String,
pub version: Option<String>,
pub latest_version: Option<String>,
}
#[derive(Debug, Clone)]
pub struct PluginInfo {
pub slug: String,
pub version: Option<String>,
pub latest_version: Option<String>,
}
#[derive(Debug, Deserialize)]
struct PluginApiResponse {
version: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ThemeApiResponse {
version: Option<String>,
}
#[derive(Debug, Deserialize)]
struct WpVersionResponse {
offers: Vec<WpVersionOffer>,
}
#[derive(Debug, Deserialize)]
struct WpVersionOffer {
version: String,
}
#[derive(Debug, Deserialize)]
struct WpJsonResponse {
name: Option<String>,
url: Option<String>,
namespaces: Option<Vec<String>>,
}
#[derive(Debug)]
pub struct Scanner {
client: Client,
base_url: Url,
}
#[derive(Debug)]
pub struct ScannerBuilder {
url: String,
allow_private: bool,
}
impl ScannerBuilder {
pub fn new(url: &str) -> Self {
Self {
url: url.to_string(),
allow_private: false,
}
}
pub fn allow_private(mut self, allow: bool) -> Self {
self.allow_private = allow;
self
}
pub fn build(self) -> Result<Scanner> {
Scanner::build_internal(&self.url, self.allow_private)
}
}
impl Scanner {
pub fn new(url: &str) -> Result<Self> {
Self::build_internal(url, false)
}
pub fn builder(url: &str) -> ScannerBuilder {
ScannerBuilder::new(url)
}
fn build_internal(url: &str, allow_private: bool) -> Result<Self> {
let url_with_scheme = if !url.contains("://") {
format!("https://{}", url)
} else {
url.to_string()
};
let base_url =
Url::parse(&url_with_scheme).map_err(|e| Error::InvalidUrl(e.to_string()))?;
if !ALLOWED_SCHEMES.contains(&base_url.scheme()) {
return Err(Error::InvalidUrl(format!(
"scheme '{}' not allowed (use http or https)",
base_url.scheme()
)));
}
if !allow_private {
Self::validate_host(&base_url)?;
}
let client = Client::builder()
.user_agent(USER_AGENT)
.timeout(Duration::from_secs(TIMEOUT_SECS))
.danger_accept_invalid_certs(false)
.build()
.map_err(|e| Error::HttpClient(e.to_string()))?;
Ok(Self { client, base_url })
}
fn validate_host(url: &Url) -> Result<()> {
let host = url
.host_str()
.ok_or_else(|| Error::InvalidUrl("missing host".to_string()))?;
if host == "localhost" || host.ends_with(".localhost") {
return Err(Error::InvalidUrl("localhost not allowed".to_string()));
}
let port = url
.port()
.unwrap_or(if url.scheme() == "https" { 443 } else { 80 });
let socket_addr = format!("{}:{}", host, port);
if let Ok(addrs) = socket_addr.to_socket_addrs() {
for addr in addrs {
if Self::is_internal_ip(addr.ip()) {
return Err(Error::InvalidUrl(format!(
"internal/private IP address not allowed: {}",
addr.ip()
)));
}
}
}
Ok(())
}
fn is_internal_ip(ip: IpAddr) -> bool {
match ip {
IpAddr::V4(ipv4) => {
ipv4.is_loopback() || ipv4.is_private() || ipv4.is_link_local() || ipv4.is_broadcast() || ipv4.is_unspecified() || ipv4.octets()[0] == 100 && ipv4.octets()[1] >= 64
&& ipv4.octets()[1] <= 127
|| ipv4.octets() == [169, 254, 169, 254] || ipv4.octets()[..2] == [192, 0] }
IpAddr::V6(ipv6) => {
ipv6.is_loopback() || ipv6.is_unspecified() || (ipv6.segments()[0] & 0xfe00) == 0xfc00
|| (ipv6.segments()[0] & 0xffc0) == 0xfe80
}
}
}
pub async fn scan(&self) -> Result<ScanResult> {
let homepage_html = self.fetch_page(&self.base_url).await?;
let document = Html::parse_document(&homepage_html);
let wordpress_version = self.detect_wp_version(&document).await;
let wordpress_detected = wordpress_version.is_some()
|| self.detect_wp_from_rest_api().await.is_some()
|| self.detect_wp_from_cookies().await.is_some();
let wordpress_latest = self.fetch_wp_latest_version().await;
let theme = self.detect_theme(&document).await;
let plugins = self.detect_plugins(&document).await;
Ok(ScanResult {
url: self.base_url.clone(),
wordpress_detected,
wordpress_version,
wordpress_latest,
theme,
plugins,
})
}
async fn fetch_wp_latest_version(&self) -> Option<String> {
let url = format!("{}/core/version-check/1.7/", WP_API_BASE);
let response: WpVersionResponse =
self.client.get(&url).send().await.ok()?.json().await.ok()?;
response.offers.first().map(|o| o.version.clone())
}
async fn fetch_plugin_latest_version(&self, slug: &str) -> Option<String> {
let url = format!(
"{}/plugins/info/1.2/?action=plugin_information&slug={}",
WP_API_BASE, slug
);
let response: PluginApiResponse =
self.client.get(&url).send().await.ok()?.json().await.ok()?;
response.version
}
async fn fetch_theme_latest_version(&self, slug: &str) -> Option<String> {
let url = format!(
"{}/themes/info/1.2/?action=theme_information&slug={}",
WP_API_BASE, slug
);
let response: ThemeApiResponse =
self.client.get(&url).send().await.ok()?.json().await.ok()?;
response.version
}
async fn fetch_page(&self, url: &Url) -> Result<String> {
let response = self
.client
.get(url.as_str())
.send()
.await
.map_err(|e| Error::HttpRequest(e.to_string()))?;
if !response.status().is_success() {
return Err(Error::HttpStatus(response.status().as_u16()));
}
response
.text()
.await
.map_err(|e| Error::HttpRequest(e.to_string()))
}
async fn detect_wp_version(&self, document: &Html) -> Option<String> {
if let Some(version) = self.detect_version_from_meta(document) {
return Some(version);
}
if let Some(version) = self.detect_version_from_feed().await {
return Some(version);
}
self.detect_version_from_readme().await
}
fn detect_version_from_meta(&self, document: &Html) -> Option<String> {
let selector = Selector::parse("meta[name='generator']").ok()?;
for element in document.select(&selector) {
if let Some(content) = element.value().attr("content")
&& content.starts_with("WordPress")
{
let version = content.strip_prefix("WordPress ")?.trim();
if !version.is_empty() {
return Some(version.to_string());
}
}
}
None
}
async fn detect_version_from_feed(&self) -> Option<String> {
let feed_url = self.base_url.join(WP_FEED_PATH).ok()?;
let html = self.fetch_page(&feed_url).await.ok()?;
let re = Regex::new(r"wordpress\.org/\?v=([0-9.]+)").ok()?;
re.captures(&html)?.get(1).map(|m| m.as_str().to_string())
}
async fn detect_version_from_readme(&self) -> Option<String> {
let readme_url = self.base_url.join(WP_README_PATH).ok()?;
let html = self.fetch_page(&readme_url).await.ok()?;
let re = Regex::new(r"Version\s+([0-9.]+)").ok()?;
re.captures(&html)?.get(1).map(|m| m.as_str().to_string())
}
async fn detect_wp_from_rest_api(&self) -> Option<()> {
let api_url = self.base_url.join(WP_JSON_PATH).ok()?;
let response = self.client.get(api_url.as_str()).send().await.ok()?;
if !response.status().is_success() {
return None;
}
let api_response: WpJsonResponse = response.json().await.ok()?;
if let Some(namespaces) = &api_response.namespaces
&& namespaces.iter().any(|ns| ns.starts_with("wp/"))
{
return Some(());
}
if api_response.name.is_some() || api_response.url.is_some() {
return Some(());
}
None
}
async fn detect_wp_from_cookies(&self) -> Option<()> {
let response = self.client.get(self.base_url.as_str()).send().await.ok()?;
for cookie in response.cookies() {
let name = cookie.name();
let is_wp_cookie =
WP_COOKIE_PREFIXES.iter().any(|p| name.starts_with(p)) || name == WP_LANG_COOKIE;
if is_wp_cookie {
return Some(());
}
}
if let Some(set_cookie) = response.headers().get("set-cookie")
&& let Ok(cookie_str) = set_cookie.to_str()
&& WP_COOKIE_PREFIXES.iter().any(|p| cookie_str.contains(p))
{
return Some(());
}
None
}
async fn detect_theme(&self, document: &Html) -> Option<ThemeInfo> {
let link_selector = Selector::parse("link[rel='stylesheet']").ok()?;
for element in document.select(&link_selector) {
if let Some(href) = element.value().attr("href")
&& let Some(mut theme) = self.extract_theme_from_url(href)
{
theme.latest_version = self.fetch_theme_latest_version(&theme.slug).await;
return Some(theme);
}
}
let style_re = Regex::new(r"/wp-content/themes/([^/]+)/").ok()?;
let html = document.html();
if let Some(caps) = style_re.captures(&html) {
let slug = caps.get(1)?.as_str().to_string();
let latest_version = self.fetch_theme_latest_version(&slug).await;
return Some(ThemeInfo {
slug,
version: None,
latest_version,
});
}
None
}
fn extract_theme_from_url(&self, url: &str) -> Option<ThemeInfo> {
let re = Regex::new(r"/wp-content/themes/([^/]+)/").ok()?;
let caps = re.captures(url)?;
let slug = caps.get(1)?.as_str().to_string();
let version = if let Some(v_pos) = url.find("ver=") {
let v_start = v_pos + 4;
let v_end = url[v_start..]
.find(|c: char| !c.is_ascii_alphanumeric() && c != '.' && c != '-' && c != '_')
.map(|i| v_start + i)
.unwrap_or(url.len());
let raw_version = url[v_start..v_end].to_string();
Some(Self::normalize_version(&raw_version))
} else {
None
};
Some(ThemeInfo {
slug,
version,
latest_version: None,
})
}
async fn detect_plugins(&self, document: &Html) -> Vec<PluginInfo> {
let mut plugin_slugs = HashSet::new();
let html = document.html();
let plugin_re = Regex::new(r"/wp-content/(?:mu-)?plugins/([a-zA-Z0-9_-]+)/").unwrap();
for caps in plugin_re.captures_iter(&html) {
if let Some(slug) = caps.get(1) {
let slug_str = slug.as_str().to_string();
if !SKIP_PLUGIN_SLUGS.contains(&slug_str.as_str()) {
plugin_slugs.insert(slug_str);
}
}
}
let mut plugins = Vec::new();
for slug in plugin_slugs {
let version = self.find_plugin_version(&html, &slug);
let latest_version = self.fetch_plugin_latest_version(&slug).await;
plugins.push(PluginInfo {
slug,
version,
latest_version,
});
}
plugins
}
fn find_plugin_version(&self, html: &str, slug: &str) -> Option<String> {
let pattern = format!(
r#"/wp-content/(?:mu-)?plugins/{}/[^'"]*\?[^'"]*ver=([0-9a-zA-Z._-]+)"#,
regex::escape(slug)
);
let re = Regex::new(&pattern).ok()?;
let caps = re.captures(html)?;
let version = caps.get(1)?.as_str().to_string();
Some(Self::normalize_version(&version))
}
fn normalize_version(version: &str) -> String {
if version.len() == 10
&& version.chars().all(|c| c.is_ascii_digit())
&& version.starts_with(['1', '2'])
{
return format!("(timestamp:{})", version);
}
if (version.len() == 40 || version.len() >= 7)
&& version.chars().all(|c| c.is_ascii_hexdigit())
&& !version.chars().all(|c| c.is_ascii_digit())
{
let short = if version.len() > 7 {
&version[..7]
} else {
version
};
return format!("(hash:{})", short);
}
version.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_valid_url() {
let scanner = Scanner::new("https://example.com");
assert!(scanner.is_ok());
}
#[test]
fn parse_invalid_url() {
let scanner = Scanner::new("not a url");
assert!(scanner.is_err());
}
#[test]
fn reject_localhost() {
let result = Scanner::new("http://localhost");
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("localhost"));
}
#[test]
fn reject_localhost_subdomain() {
let result = Scanner::new("http://foo.localhost");
assert!(result.is_err());
}
#[test]
fn reject_file_scheme() {
let result = Scanner::new("file:///etc/passwd");
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("scheme"));
}
#[test]
fn reject_ftp_scheme() {
let result = Scanner::new("ftp://example.com");
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("scheme"));
}
#[test]
fn internal_ip_detection() {
use std::net::Ipv4Addr;
assert!(Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
10, 0, 0, 1
))));
assert!(Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
172, 16, 0, 1
))));
assert!(Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
192, 168, 1, 1
))));
assert!(Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
127, 0, 0, 1
))));
assert!(Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
169, 254, 1, 1
))));
assert!(!Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
8, 8, 8, 8
))));
assert!(!Scanner::is_internal_ip(IpAddr::V4(Ipv4Addr::new(
93, 184, 216, 34
))));
}
#[test]
fn normalize_semantic_version() {
assert_eq!(Scanner::normalize_version("1.2.3"), "1.2.3");
assert_eq!(Scanner::normalize_version("22.0.0"), "22.0.0");
assert_eq!(Scanner::normalize_version("7.0-alpha"), "7.0-alpha");
}
#[test]
fn normalize_timestamp_version() {
assert_eq!(
Scanner::normalize_version("1748271784"),
"(timestamp:1748271784)"
);
assert_eq!(
Scanner::normalize_version("1748268723"),
"(timestamp:1748268723)"
);
}
#[test]
fn normalize_hash_version() {
assert_eq!(
Scanner::normalize_version("569ab5664387d06c16a234c9771d3d57fb15720a"),
"(hash:569ab56)"
);
assert_eq!(Scanner::normalize_version("abcdef1"), "(hash:abcdef1)");
}
#[test]
fn normalize_date_version() {
assert_eq!(Scanner::normalize_version("20200121"), "20200121");
}
}