wordpress_vulnerable_scanner/
scanner.rs

1//! WordPress website scanner
2//!
3//! Detects WordPress version, plugins, and themes by analyzing the website.
4
5use crate::error::{Error, Result};
6use regex::Regex;
7use reqwest::Client;
8use scraper::{Html, Selector};
9use serde::{Deserialize, Serialize};
10use std::collections::HashSet;
11use std::sync::LazyLock;
12use std::time::Duration;
13use url::Url;
14
15use crate::http::{TIMEOUT_SECS, USER_AGENT};
16
17// Pre-compiled regex patterns for performance
18static RE_WP_FEED_VERSION: LazyLock<Regex> =
19    LazyLock::new(|| Regex::new(r"wordpress\.org/\?v=([0-9.]+)").unwrap());
20static RE_WP_README_VERSION: LazyLock<Regex> =
21    LazyLock::new(|| Regex::new(r"Version\s+([0-9.]+)").unwrap());
22static RE_THEME_PATH: LazyLock<Regex> =
23    LazyLock::new(|| Regex::new(r"/wp-content/themes/([^/]+)/").unwrap());
24static RE_PLUGIN_PATH: LazyLock<Regex> =
25    LazyLock::new(|| Regex::new(r"/wp-content/plugins/([a-zA-Z0-9_-]+)/").unwrap());
26
27/// Detected component information
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct ComponentInfo {
30    /// Component type
31    pub component_type: ComponentType,
32    /// Component slug/identifier
33    pub slug: String,
34    /// Detected version (if found)
35    pub version: Option<String>,
36}
37
38/// Type of WordPress component
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
40#[serde(rename_all = "lowercase")]
41pub enum ComponentType {
42    /// WordPress core
43    Core,
44    /// Plugin
45    Plugin,
46    /// Theme
47    Theme,
48}
49
50impl std::fmt::Display for ComponentType {
51    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52        match self {
53            ComponentType::Core => write!(f, "core"),
54            ComponentType::Plugin => write!(f, "plugin"),
55            ComponentType::Theme => write!(f, "theme"),
56        }
57    }
58}
59
60/// Scan results from analyzing a WordPress site
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct ScanResult {
63    /// Target URL
64    pub url: String,
65    /// All detected components
66    pub components: Vec<ComponentInfo>,
67}
68
69impl ScanResult {
70    /// Create an empty scan result
71    pub fn empty(url: &str) -> Self {
72        Self {
73            url: url.to_string(),
74            components: Vec::new(),
75        }
76    }
77
78    /// Create from manually specified components
79    pub fn from_components(components: Vec<ComponentInfo>) -> Self {
80        Self {
81            url: String::new(),
82            components,
83        }
84    }
85
86    /// Get WordPress core component
87    pub fn core(&self) -> Option<&ComponentInfo> {
88        self.components
89            .iter()
90            .find(|c| c.component_type == ComponentType::Core)
91    }
92
93    /// Get all plugins
94    pub fn plugins(&self) -> impl Iterator<Item = &ComponentInfo> {
95        self.components
96            .iter()
97            .filter(|c| c.component_type == ComponentType::Plugin)
98    }
99
100    /// Get all themes
101    pub fn themes(&self) -> impl Iterator<Item = &ComponentInfo> {
102        self.components
103            .iter()
104            .filter(|c| c.component_type == ComponentType::Theme)
105    }
106}
107
108/// WordPress scanner
109pub struct Scanner {
110    client: Client,
111    base_url: Url,
112}
113
114impl Scanner {
115    /// Create a new scanner for the given URL
116    pub fn new(url: &str) -> Result<Self> {
117        let base_url = Url::parse(url).map_err(|e| Error::InvalidUrl(e.to_string()))?;
118
119        let client = Client::builder()
120            .user_agent(USER_AGENT)
121            .timeout(Duration::from_secs(TIMEOUT_SECS))
122            .danger_accept_invalid_certs(false)
123            .build()
124            .map_err(|e| Error::HttpClient(e.to_string()))?;
125
126        Ok(Self { client, base_url })
127    }
128
129    /// Scan the WordPress site
130    pub async fn scan(&self) -> Result<ScanResult> {
131        // Fetch homepage
132        let homepage_html = self.fetch_page(&self.base_url).await?;
133        let document = Html::parse_document(&homepage_html);
134
135        let mut components = Vec::new();
136
137        // Detect WordPress version
138        if let Some(version) = self.detect_wp_version(&document).await {
139            components.push(ComponentInfo {
140                component_type: ComponentType::Core,
141                slug: "wordpress".to_string(),
142                version: Some(version),
143            });
144        }
145
146        // Detect theme
147        if let Some(theme) = self.detect_theme(&document) {
148            components.push(theme);
149        }
150
151        // Detect plugins
152        let plugins = self.detect_plugins(&document);
153        components.extend(plugins);
154
155        Ok(ScanResult {
156            url: self.base_url.to_string(),
157            components,
158        })
159    }
160
161    /// Fetch a page and return its HTML
162    async fn fetch_page(&self, url: &Url) -> Result<String> {
163        let response = self
164            .client
165            .get(url.as_str())
166            .send()
167            .await
168            .map_err(|e| Error::HttpRequest(e.to_string()))?;
169
170        if !response.status().is_success() {
171            return Err(Error::HttpStatus(response.status().as_u16()));
172        }
173
174        response
175            .text()
176            .await
177            .map_err(|e| Error::HttpRequest(e.to_string()))
178    }
179
180    /// Detect WordPress version from various sources
181    async fn detect_wp_version(&self, document: &Html) -> Option<String> {
182        // Try meta generator tag first
183        if let Some(version) = self.detect_version_from_meta(document) {
184            return Some(version);
185        }
186
187        // Try RSS feed
188        if let Some(version) = self.detect_version_from_feed().await {
189            return Some(version);
190        }
191
192        // Try readme.html
193        if let Some(version) = self.detect_version_from_readme().await {
194            return Some(version);
195        }
196
197        None
198    }
199
200    /// Detect version from meta generator tag
201    fn detect_version_from_meta(&self, document: &Html) -> Option<String> {
202        let selector = Selector::parse("meta[name='generator']").ok()?;
203
204        for element in document.select(&selector) {
205            if let Some(content) = element.value().attr("content")
206                && content.starts_with("WordPress")
207            {
208                // Extract version from "WordPress X.Y.Z"
209                let version = content.strip_prefix("WordPress ")?.trim();
210                if !version.is_empty() {
211                    return Some(version.to_string());
212                }
213            }
214        }
215        None
216    }
217
218    /// Detect version from RSS feed
219    async fn detect_version_from_feed(&self) -> Option<String> {
220        let feed_url = self.base_url.join("/feed/").ok()?;
221
222        let html = self.fetch_page(&feed_url).await.ok()?;
223
224        // Look for <generator>https://wordpress.org/?v=X.Y.Z</generator>
225        let caps = RE_WP_FEED_VERSION.captures(&html)?;
226        Some(caps.get(1)?.as_str().to_string())
227    }
228
229    /// Detect version from readme.html
230    async fn detect_version_from_readme(&self) -> Option<String> {
231        let readme_url = self.base_url.join("/readme.html").ok()?;
232
233        let html = self.fetch_page(&readme_url).await.ok()?;
234
235        // Look for "Version X.Y.Z" in readme
236        let caps = RE_WP_README_VERSION.captures(&html)?;
237        Some(caps.get(1)?.as_str().to_string())
238    }
239
240    /// Detect the main theme
241    fn detect_theme(&self, document: &Html) -> Option<ComponentInfo> {
242        // Look for theme in stylesheet URLs
243        let link_selector = Selector::parse("link[rel='stylesheet']").ok()?;
244
245        for element in document.select(&link_selector) {
246            if let Some(href) = element.value().attr("href")
247                && let Some(info) = self.extract_theme_from_url(href)
248            {
249                return Some(info);
250            }
251        }
252
253        // Also check style tags and other sources
254        let html = document.html();
255        if let Some(caps) = RE_THEME_PATH.captures(&html) {
256            let slug = caps.get(1)?.as_str().to_string();
257            return Some(ComponentInfo {
258                component_type: ComponentType::Theme,
259                slug,
260                version: None,
261            });
262        }
263
264        None
265    }
266
267    /// Extract theme info from a URL
268    fn extract_theme_from_url(&self, url: &str) -> Option<ComponentInfo> {
269        // Match /wp-content/themes/theme-name/
270        let caps = RE_THEME_PATH.captures(url)?;
271        let slug = caps.get(1)?.as_str().to_string();
272
273        let version = extract_version_param(url);
274
275        Some(ComponentInfo {
276            component_type: ComponentType::Theme,
277            slug,
278            version,
279        })
280    }
281
282    /// Detect plugins from the page
283    fn detect_plugins(&self, document: &Html) -> Vec<ComponentInfo> {
284        let mut plugin_slugs = HashSet::new();
285        let html = document.html();
286
287        // Use pre-compiled regex to find plugin paths
288        for caps in RE_PLUGIN_PATH.captures_iter(&html) {
289            if let Some(slug) = caps.get(1) {
290                let slug_str = slug.as_str().to_string();
291                // Skip common non-plugin paths
292                if slug_str != "index" && slug_str != "cache" {
293                    plugin_slugs.insert(slug_str);
294                }
295            }
296        }
297
298        // Convert to ComponentInfo
299        plugin_slugs
300            .into_iter()
301            .map(|slug| {
302                let version = self.find_plugin_version(&html, &slug);
303                ComponentInfo {
304                    component_type: ComponentType::Plugin,
305                    slug,
306                    version,
307                }
308            })
309            .collect()
310    }
311
312    /// Find plugin version from HTML
313    fn find_plugin_version(&self, html: &str, slug: &str) -> Option<String> {
314        // Look for the plugin path and then extract ver= parameter
315        // This avoids compiling a new regex for each plugin
316        let plugin_path = format!("/wp-content/plugins/{}/", slug);
317
318        // Find all occurrences of this plugin path and check for version
319        for (pos, _) in html.match_indices(&plugin_path) {
320            // Look ahead in the URL for ver= parameter (within ~200 chars)
321            let search_end = (pos + 200).min(html.len());
322            let url_slice = &html[pos..search_end];
323
324            // Find the end of this URL (quote or space)
325            let url_end = url_slice
326                .find(['"', '\'', '>', ' '])
327                .unwrap_or(url_slice.len());
328            let url = &url_slice[..url_end];
329
330            // Extract version from this URL
331            if let Some(version) = extract_version_param(url) {
332                return Some(version);
333            }
334        }
335        None
336    }
337}
338
339/// Version parameter prefix in URLs
340const VERSION_PARAM: &str = "ver=";
341
342/// Extract version from URL query parameter (e.g., "?ver=1.2.3")
343fn extract_version_param(url: &str) -> Option<String> {
344    let v_pos = url.find(VERSION_PARAM)?;
345    let v_start = v_pos + VERSION_PARAM.len();
346    let v_end = url[v_start..]
347        .find(|c: char| !c.is_ascii_alphanumeric() && c != '.')
348        .map(|i| v_start + i)
349        .unwrap_or(url.len());
350    Some(url[v_start..v_end].to_string())
351}
352
353/// Parse a component string like "slug:version" or "slug"
354pub fn parse_component(s: &str, component_type: ComponentType) -> Result<ComponentInfo> {
355    let parts: Vec<&str> = s.split(':').collect();
356    match parts.len() {
357        1 => Ok(ComponentInfo {
358            component_type,
359            slug: parts[0].trim().to_string(),
360            version: None,
361        }),
362        2 => Ok(ComponentInfo {
363            component_type,
364            slug: parts[0].trim().to_string(),
365            version: Some(parts[1].trim().to_string()),
366        }),
367        _ => match component_type {
368            ComponentType::Plugin => Err(Error::InvalidPluginFormat(s.to_string())),
369            ComponentType::Theme => Err(Error::InvalidThemeFormat(s.to_string())),
370            ComponentType::Core => Err(Error::InvalidPluginFormat(s.to_string())),
371        },
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378
379    #[test]
380    fn parse_valid_url() {
381        let scanner = Scanner::new("https://example.com");
382        assert!(scanner.is_ok());
383    }
384
385    #[test]
386    fn parse_invalid_url() {
387        let scanner = Scanner::new("not a url");
388        assert!(scanner.is_err());
389    }
390
391    #[test]
392    fn parse_component_with_version() {
393        let info = parse_component("elementor:3.18.0", ComponentType::Plugin).unwrap();
394        assert_eq!(info.slug, "elementor");
395        assert_eq!(info.version, Some("3.18.0".to_string()));
396    }
397
398    #[test]
399    fn parse_component_without_version() {
400        let info = parse_component("elementor", ComponentType::Plugin).unwrap();
401        assert_eq!(info.slug, "elementor");
402        assert_eq!(info.version, None);
403    }
404}