1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
//! Remote rule fetcher module
//! 远程规则拉取工具
//! 核心特性:
//! 1. 纯异步设计(无block_on,基于tokio异步运行时)
//! 2. 可配置重试策略(Never/Times(n))
//! 3. ETag缓存控制(支持弱ETag解析,W/前缀和引号处理)
//! 4. 特性条件编译(remote-loader特性控制功能开关)
//! 5. 鲁棒的错误处理(详细错误上下文,友好日志提示)
use crate::error::{RswResult, RswError};
use crate::rule::loader::ETagRecord;
#[cfg(feature = "remote-loader")]
use reqwest::Client;
use rswappalyzer_engine::RuleLibrary;
use std::path::Path;
/// 远程规则拉取器
/// 设计:无状态工具类,专注于远程规则的拉取、ETag获取和重试逻辑
#[derive(Default)]
pub struct RemoteRuleFetcher;
impl RemoteRuleFetcher {
/// 通用异步重试逻辑(纯异步,无阻塞)
/// 特性:
/// 1. 可配置最大重试次数
/// 2. 指数退避(固定1秒间隔,可扩展)
/// 3. 保留最后一次错误信息
/// 4. 异步闭包支持(FnMut返回Future)
/// 参数:
/// - max_retries: 最大重试次数(0表示不重试)
/// - func: 异步闭包,返回RswResult<T>
/// 返回:执行结果 | 最后一次错误
#[cfg(feature = "remote-loader")]
#[cfg(feature = "remote-loader")]
async fn simple_retry<F, Fut, T>(&self, max_retries: usize, mut func: F) -> RswResult<T>
where
// 泛型约束:func 返回一个 Send + 'static 的 Future
F: FnMut() -> Fut,
Fut: std::future::Future<Output = RswResult<T>> + Send + 'static,
{
let mut last_err: Option<RswError> = None;
for attempt in 0..=max_retries {
match func().await {
Ok(res) => return Ok(res),
Err(e) => {
last_err = Some(e);
if attempt < max_retries {
log::warn!(
"Request failed, retrying (attempt {}/{})",
attempt + 1,
max_retries
);
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
}
}
}
}
Err(last_err.unwrap_or_else(|| {
RswError::RuleLoadError("All retry attempts exhausted".to_string())
}))
}
/// 获取远程资源的ETag(纯异步)
/// 特性:
/// 1. HEAD请求(轻量,仅获取Header)
/// 2. 支持弱ETag解析(移除W/前缀和引号)
/// 3. 重试策略适配(Never/Times(n))
/// 4. 友好错误处理(失败时返回Ok(None),而非直接报错)
/// 参数:
/// - client: reqwest异步客户端
/// - url: 远程资源URL
/// - retry_policy: 重试策略
/// 返回:ETag字符串(Option) | 错误(仅严重错误)
#[cfg(feature = "remote-loader")]
pub async fn get_remote_etag(
&self,
client: &Client,
url: &str,
retry_policy: &crate::RetryPolicy,
) -> RswResult<Option<String>> {
// 解析重试次数
let max_retries = match retry_policy {
crate::RetryPolicy::Never => 0,
crate::RetryPolicy::Times(n) => *n as usize,
};
let result = self
.simple_retry(max_retries, || {
// 捕获上下文变量(clone避免生命周期问题)
let client = client.clone();
let url = url.to_string();
// 返回异步闭包
Box::pin(async move {
// 发送HEAD请求获取ETag
let response = client
.head(&url)
.header("User-Agent", "Rswappalyzer/0.1.0")
.send()
.await
.map_err(|e| {
RswError::RuleLoadError(format!(
"Failed to request ETag: {:#?}",
e
))
})?;
// 检查响应状态码
if !response.status().is_success() {
return Err(RswError::RuleLoadError(format!(
"Failed to get ETag: URL {} returned status code {}",
url,
response.status()
)));
}
// 提取并解析ETag
let etag = response
.headers()
.get(reqwest::header::ETAG)
.ok_or_else(|| {
RswError::RuleLoadError(format!(
"URL {} did not return ETag header",
url
))
})?
.to_str()
.map_err(|e| {
RswError::RuleLoadError(format!(
"Failed to convert ETag to string: {}",
e
))
})?;
// 清理ETag(移除W/前缀和引号)
let etag_clean = etag.trim_start_matches("W/").trim_matches('"').to_string();
Ok(etag_clean)
})
})
.await;
// 处理结果:成功返回Some(ETag),失败返回None(记录警告)
match result {
Ok(etag) => {
log::debug!("Successfully fetched ETag for URL [{}]: {}", url, etag);
Ok(Some(etag))
}
Err(e) => {
log::warn!("Failed to fetch ETag for URL [{}]: {}", url, e);
Ok(None)
}
}
}
/// 拉取远程Wappalyzer规则库(纯异步)
/// 特性:
/// 1. GET请求(支持gzip/deflate压缩)
/// 2. 自动解析原始规则为RuleLibrary
/// 3. 重试策略适配
/// 4. 详细的日志和错误上下文
/// 参数:
/// - client: reqwest异步客户端
/// - url: 远程规则库URL
/// - retry_policy: 重试策略
/// 返回:解析后的RuleLibrary | 错误
#[cfg(feature = "remote-loader")]
pub async fn fetch_wappalyzer_rules(
&self,
client: &Client,
url: &str,
retry_policy: &crate::RetryPolicy,
) -> RswResult<RuleLibrary> {
use rswappalyzer_engine::source::{
wappalyzer::WappalyzerOriginalRuleLibrary, WappalyzerParser,
};
// 解析重试次数
let max_retries = match retry_policy {
crate::RetryPolicy::Never => 0,
crate::RetryPolicy::Times(n) => *n as usize,
};
let rule_lib = self
.simple_retry(max_retries, || {
// 捕获上下文变量
let client = client.clone();
let url = url.to_string();
// 返回异步闭包
Box::pin(async move {
// 发送GET请求拉取规则
let response = client
.get(&url)
.header("User-Agent", "Rswappalyzer/0.1.0")
.header("Accept-Encoding", "gzip, deflate")
.send()
.await
.map_err(|e| {
RswError::RuleLoadError(format!(
"Failed to fetch rules: {:#?}",
e
))
})?;
// 检查响应状态码
if !response.status().is_success() {
return Err(RswError::RuleLoadError(format!(
"Failed to fetch rules: URL {} returned status code {}",
url,
response.status()
)));
}
// 异步读取响应字节
let bytes = response.bytes().await.map_err(|e| {
RswError::RuleLoadError(format!(
"Failed to read response bytes: {}",
e
))
})?;
// 解析原始规则
let parser = WappalyzerParser::default();
let original_lib: WappalyzerOriginalRuleLibrary =
parser.parse_from_bytes(&bytes).map_err(|e| {
RswError::RuleLoadError(format!(
"Failed to parse original rules: {}",
e
))
})?;
// 转换为标准RuleLibrary
let rule_lib = parser.convert_original_to_rule_lib(original_lib);
Ok(rule_lib)
})
})
.await?;
// 记录成功日志
log::debug!(
"Successfully fetched Wappalyzer rules, total tech rules: {}",
rule_lib.core_tech_map.len()
);
Ok(rule_lib)
}
/// 判断是否使用本地缓存文件
/// 规则:
/// 1. 本地ETag记录存在
/// 2. ETag与远程一致
/// 3. 本地文件存在
/// 参数:
/// - local_record: 本地ETag记录(Option)
/// - remote_etag: 远程ETag
/// 返回:是否使用本地文件(true/false)
pub fn should_use_local_file(
&self,
local_record: &Option<ETagRecord>,
remote_etag: &str,
) -> bool {
local_record.as_ref().map_or(false, |r| {
r.etag == remote_etag && Path::new(&r.local_file_path).exists()
})
}
/// 未启用remote-loader特性时的占位实现(ETag获取)
/// 返回:明确的特性未启用错误
#[cfg(not(feature = "remote-loader"))]
pub async fn get_remote_etag(
&self,
_client: &(), // 空元组占位(该分支不会被实际调用)
_url: &str,
_retry_policy: &crate::RetryPolicy,
) -> RswResult<Option<String>> {
Err(RswError::RuleLoadError(
"remote-loader feature is not enabled".to_string(),
))
}
/// 未启用remote-loader特性时的占位实现(规则拉取)
/// 返回:明确的特性未启用错误
#[cfg(not(feature = "remote-loader"))]
pub async fn fetch_wappalyzer_rules(
&self,
_client: &(), // 空元组占位(该分支不会被实际调用)
_url: &str,
_retry_policy: &crate::RetryPolicy,
) -> RswResult<RuleLibrary> {
Err(RswError::RuleLoadError(
"remote-loader feature is not enabled".to_string(),
))
}
}