use log::{debug, warn};
#[cfg(feature = "remote-loader")]
use reqwest::Client;
use rswappalyzer_engine::compiled::CompiledBundle;
use rswappalyzer_engine::source::WappalyzerParser;
use rswappalyzer_engine::{CachedTechRule, RuleLibrary, RuleProcessor};
use std::fs;
use std::path::{Path, PathBuf};
use crate::error::{RswError, RswResult};
use crate::{RuleCacheManager, RuleConfig, RuleSource, RuleStage};
#[derive(Default)]
pub struct RuleLoader {
#[cfg(feature = "remote-loader")]
etag_manager: crate::rule::loader::EtagManager,
#[cfg(feature = "remote-loader")]
remote_fetcher: crate::rule::loader::RemoteRuleFetcher,
rule_processor: RuleProcessor,
}
impl RuleLoader {
pub fn new() -> Self {
Self::default()
}
pub fn load_embedded(&self) -> RswResult<RuleLibrary> {
Ok(RuleLibrary::default())
}
pub async fn load(&self, config: &RuleConfig) -> RswResult<RuleLibrary> {
match (&config.origin.source, config.origin.stage) {
(RuleSource::Embedded, _) => self.load_embedded(),
(RuleSource::LocalFile(path), RuleStage::Raw) => {
self.load_local_file(config, path).await
}
(RuleSource::LocalFile(_), RuleStage::Compiled) => Err(RswError::RuleConfigError(
"Compiled rules are not supported in the current loading stage.".into(),
)),
(RuleSource::LocalFile(path), RuleStage::Cached) => {
self.load_cached_rule(path).await
}
(RuleSource::RemoteOfficial | RuleSource::RemoteCustom(_), RuleStage::Raw) => {
self.load_remote_rules(config).await
}
(RuleSource::RemoteOfficial | RuleSource::RemoteCustom(_), RuleStage::Cached) => {
return Err(RswError::RuleConfigError(
"Cached stage is only supported for local files, remote rules do not have cached format".into(),
));
}
(RuleSource::RemoteOfficial | RuleSource::RemoteCustom(_), RuleStage::Compiled) => {
Err(RswError::RuleConfigError(
"Remote rules do not support Compiled stage, use Raw stage instead".into(),
))
}
}
}
pub async fn load_compiled_bundle(&self, path: &Path) -> RswResult<CompiledBundle> {
debug!("Loading precompiled rules from: {}", path.display());
let raw_content = fs::read(path).map_err(RswError::IoError)?;
self.load_compiled_bytes(&raw_content)
}
pub fn load_compiled_bytes(&self, bytes: &[u8]) -> RswResult<CompiledBundle> {
let compiled_bundle: CompiledBundle = serde_json::from_slice(bytes).map_err(|e| {
RswError::RuleLoadError(format!("Failed to parse compiled bytes: {}", e))
})?;
Ok(compiled_bundle)
}
pub fn load_cached_rule_bytes(&self, bytes: &[u8]) -> RswResult<RuleLibrary> {
let cached_rules: Vec<CachedTechRule> = serde_json::from_slice(bytes)
.map_err(|e| RswError::RuleLoadError(format!("Failed to parse cached bytes: {}", e)))?;
let rule_lib = RuleCacheManager::convert_cached_rules(cached_rules)?;
Ok(rule_lib)
}
pub async fn load_cached_rule(&self, path: &PathBuf) -> RswResult<RuleLibrary> {
let raw_content = fs::read(path).map_err(RswError::IoError)?;
self.load_cached_rule_bytes(&raw_content)
}
async fn load_from_cache_unified(&self, config: &RuleConfig) -> Option<RuleLibrary> {
let cache_path = config.get_cache_file_path();
match RuleCacheManager::load_from_cache(config) {
Ok(rule_lib) => {
debug!("Loaded rules from cache: {}", cache_path.display());
Some(rule_lib)
}
Err(e) if e.is_not_found() => {
debug!("Cache not found: {}", cache_path.display());
None
}
Err(e) => {
warn!(
"Failed to load rules from cache: {} - {}",
cache_path.display(),
e
);
None
}
}
}
async fn save_to_cache_unified(&self, config: &RuleConfig, rule_lib: &RuleLibrary) {
let cache_path = config.get_cache_file_path();
if let Err(e) = RuleCacheManager::save_to_cache(config, rule_lib) {
warn!("Failed to cache rules: {} - {}", cache_path.display(), e);
} else {
debug!("Rules cached successfully to: {}", cache_path.display());
}
}
async fn load_local_file(&self, config: &RuleConfig, path: &Path) -> RswResult<RuleLibrary> {
if let Some(cached_lib) = self.load_from_cache_unified(config).await {
return Ok(cached_lib);
}
warn!("Local cache not found, reading raw rule file: {:?}", path);
debug!("Loading raw local rules from: {}", path.display());
let raw_content = fs::read_to_string(path).map_err(RswError::IoError)?;
let parser = WappalyzerParser::default();
let raw_lib = parser
.parse_to_rule_lib(&raw_content)
.map_err(|e| RswError::RuleLoadError(format!("Failed to parse rules: {}", e)))?;
let cleaned_lib = self.rule_processor.clean_and_split_rules(&raw_lib)?;
self.save_to_cache_unified(config, &cleaned_lib).await;
Ok(cleaned_lib)
}
#[cfg(feature = "remote-loader")]
async fn load_remote_rules(&self, config: &RuleConfig) -> RswResult<RuleLibrary> {
let remote_opts = config.remote_options.as_ref().ok_or_else(|| {
RswError::RuleLoadError("Missing remote network configuration".into())
})?;
let (remote_url, source_identifier) = match &config.origin {
RuleOrigin::RemoteOfficial => (
"https://raw.githubusercontent.com/projectdiscovery/wappalyzergo/refs/heads/main/fingerprints_data.json",
"wappalyzergo_official"
),
RuleOrigin::RemoteCustom(custom_url) => (custom_url.as_str(), "wappalyzer_custom"),
_ => return Err(RswError::RuleLoadError("Not a remote rule source".into())),
};
let cached_lib = self.load_from_cache_unified(config).await;
if let Some(lib) = cached_lib {
if !config.options.check_update {
debug!("check_update is false and cache exists, skip all network requests");
return Ok(lib);
}
debug!("check_update is true, proceed to ETag check");
} else {
warn!("Cache not found, need to fetch remote rules completely");
}
let client = Client::builder()
.timeout(remote_opts.timeout)
.build()
.map_err(|e| RswError::RuleLoadError(format!("Failed to build HTTP client: {}", e)))?;
let cleaned_rule_lib = if config.options.check_update {
let mut etag_records = self.etag_manager.load_etag_records(config)?;
let remote_etag = self
.remote_fetcher
.get_remote_etag(&client, remote_url, &remote_opts.retry)
.await?;
match remote_etag {
None => {
warn!("Remote ETag not found, force fetching latest rules");
let raw_lib = self
.remote_fetcher
.fetch_wappalyzer_rules(&client, remote_url, &remote_opts.retry)
.await?;
let cleaned_lib = self.rule_processor.clean_and_split_rules(&raw_lib)?;
self.save_to_cache_unified(config, &cleaned_lib).await;
cleaned_lib
}
Some(etag) => {
let local_etag_record = self
.etag_manager
.find_local_etag(config, source_identifier)?;
let use_local_cache = self
.remote_fetcher
.should_use_local_file(&local_etag_record, &etag);
if use_local_cache {
debug!("Rule library is up-to-date, using local cache");
self.load_from_cache_unified(config).await.ok_or_else(|| {
RswError::RuleLoadError("Local cache missing but ETag matches".into())
})?
} else {
debug!("New rule library detected, fetching remote rules");
let raw_lib = self
.remote_fetcher
.fetch_wappalyzer_rules(&client, remote_url, &remote_opts.retry)
.await?;
let cleaned_lib = self.rule_processor.clean_and_split_rules(&raw_lib)?;
self.save_to_cache_unified(config, &cleaned_lib).await;
self.etag_manager.upsert_and_save_etag(
config,
&mut etag_records,
source_identifier,
etag,
config.get_cache_file_path().to_string_lossy().to_string(),
)?;
cleaned_lib
}
}
}
} else {
debug!("check_update is false, fetch full rules without ETag check");
let raw_lib = self
.remote_fetcher
.fetch_wappalyzer_rules(&client, remote_url, &remote_opts.retry)
.await?;
let cleaned_lib = self.rule_processor.clean_and_split_rules(&raw_lib)?;
self.save_to_cache_unified(config, &cleaned_lib).await;
cleaned_lib
};
Ok(cleaned_rule_lib)
}
#[cfg(not(feature = "remote-loader"))]
async fn load_remote_rules(&self, _config: &RuleConfig) -> RswResult<RuleLibrary> {
Err(RswError::RuleLoadError(
"Please enable 'remote-loader' feature to load remote rules".into(),
))
}
pub fn debug_count_script_rules(&self, rule_lib: &RuleLibrary) {
self.rule_processor.debug_count_script_rules(rule_lib);
}
}
#[cfg(feature = "remote-loader")]
impl From<tokio::task::JoinError> for RswError {
fn from(err: tokio::task::JoinError) -> Self {
RswError::AsyncTaskError(format!("Async task failed: {}", err))
}
}