1use super::backend;
8use super::error::EvalError;
9use super::source::{CleanStep, Rule};
10use super::transform;
11use fancy_regex::Regex;
12use std::collections::HashMap;
13use std::sync::LazyLock;
14
15pub type Vars = HashMap<String, String>;
17
18pub fn eval_value(rule: &Rule, ctx: &str, vars: &Vars) -> Result<String, EvalError> {
20 match rule {
21 Rule::Literal { literal } => Ok(literal.clone()),
22 Rule::Template { template } => Ok(interpolate(template, vars)),
23 Rule::FirstOf { first_of } => {
24 for r in first_of {
25 let v = eval_value(r, ctx, vars)?;
26 if !v.trim().is_empty() {
27 return Ok(v);
28 }
29 }
30 Ok(String::new())
31 }
32 Rule::Concat { concat, join } => {
33 let mut parts = Vec::new();
34 for r in concat {
35 let v = eval_value(r, ctx, vars)?;
36 if !v.trim().is_empty() {
37 parts.push(v);
38 }
39 }
40 Ok(parts.join(join))
41 }
42 Rule::Js { js } => run_js(js, ctx, vars),
43 Rule::Leaf(l) => {
44 let raw = backend::extract(l.via, ctx, l.select.as_deref(), l.index, &l.extract)?;
45 apply_clean(raw, &l.clean, vars)
46 }
47 }
48}
49
50fn run_js(script: &str, result: &str, vars: &Vars) -> Result<String, EvalError> {
53 #[cfg(feature = "js")]
54 {
55 crate::js::eval_js(script, result, vars)
56 }
57 #[cfg(not(feature = "js"))]
58 {
59 let _ = (script, result, vars);
60 Err(EvalError::Unsupported("js"))
61 }
62}
63
64pub fn eval_list(rule: &Rule, ctx: &str) -> Result<Vec<String>, EvalError> {
66 match rule {
67 Rule::Leaf(l) => match l.select.as_deref() {
68 Some(sel) => backend::select_all(l.via, ctx, sel),
69 None => Ok(vec![ctx.to_string()]),
71 },
72 Rule::FirstOf { first_of } => {
73 for r in first_of {
74 let v = eval_list(r, ctx)?;
75 if !v.is_empty() {
76 return Ok(v);
77 }
78 }
79 Ok(Vec::new())
80 }
81 other => {
83 let v = eval_value(other, ctx, &Vars::new())?;
84 Ok(if v.is_empty() { Vec::new() } else { vec![v] })
85 }
86 }
87}
88
89fn apply_clean(mut s: String, steps: &[CleanStep], vars: &Vars) -> Result<String, EvalError> {
93 for step in steps {
94 if let Some(pat) = &step.regex {
95 let re = Regex::new(pat).map_err(|e| EvalError::Regex(e.to_string()))?;
97 let rep = step.replace.as_deref().unwrap_or("");
98 s = re.replace_all(&s, rep).into_owned();
99 }
100 if step.trim.unwrap_or(false) {
101 s = s.trim().to_string();
102 }
103 if let Some(p) = &step.prepend {
104 s = format!("{p}{s}");
105 }
106 if let Some(a) = &step.append {
107 s = format!("{s}{a}");
108 }
109 if let Some(c) = step.decode {
110 s = transform::decode(&s, c)?;
111 }
112 if let Some(c) = step.encode {
113 s = transform::encode(&s, c)?;
114 }
115 if let Some(h) = &step.hash {
116 s = transform::hash(&s, h)?;
117 }
118 if let Some(c) = &step.cipher {
119 s = transform::cipher(&s, c)?;
120 }
121 if let Some(table) = &step.font_map {
122 s = transform::font_map(&s, table)?;
123 }
124 if let Some(cn) = step.cn {
125 s = transform::cn_convert(&s, cn);
126 }
127 if let Some(js) = &step.js {
128 s = run_js(js, &s, vars)?;
129 }
130 }
131 Ok(s)
132}
133
134pub(crate) fn interpolate(template: &str, vars: &Vars) -> String {
136 static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\{\s*([\w.\-]+)\s*\}\}").unwrap());
137 RE.replace_all(template, |c: &fancy_regex::Captures| {
138 c.get(1)
139 .and_then(|m| vars.get(m.as_str()))
140 .cloned()
141 .unwrap_or_default()
142 })
143 .into_owned()
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149 use crate::source::Rule;
150
151 fn rule(j: &str) -> Rule {
152 serde_json::from_str(j).expect("rule json")
153 }
154
155 const CATALOG: &str = r#"<html><body>
158 <div class="box">
159 <span id="shuqian"><h2 class="module-title type">阅读进度</h2></span>
160 <h2 class="module-title type">第一卷 魔性不改</h2>
161 <div class="module-row-info"><a class="module-row-text" href="/n/1.html"><i></i><div class="module-row-title"><span>第一章 甲</span></div></a></div>
162 <div class="module-row-info"><a class="module-row-text" href="/n/2.html"><i></i><div class="module-row-title"><span>第二章 乙</span></div></a></div>
163 <h2 class="module-title type">第二卷 魔子出山</h2>
164 <div class="module-row-info"><a class="module-row-text" href="/n/3.html"><i></i><div class="module-row-title"><span>第三章 丙</span></div></a></div>
165 </div>
166 </body></html>"#;
167
168 fn toc_list() -> Rule {
169 rule(r#"{"via":"css","select":".box > h2.module-title.type, .box a.module-row-text"}"#)
170 }
171
172 #[test]
173 fn list_selects_volumes_and_chapters_in_document_order() {
174 let items = eval_list(&toc_list(), CATALOG).unwrap();
175 assert_eq!(items.len(), 5, "2 卷 + 3 章 = 5(排除 span 内的阅读进度)");
176 }
177
178 #[test]
179 fn toc_rules_split_into_volumes_and_chapters() {
180 let name = rule(
181 r#"{"firstOf":[{"via":"css","select":".module-row-title","extract":"text"},{"via":"css","select":"h2","extract":"text"}]}"#,
182 );
183 let url = rule(r#"{"via":"css","select":"a","extract":{"attr":"href"}}"#);
184 let is_volume = rule(r#"{"via":"css","select":"h2","extract":"text"}"#);
185 let vars = Vars::new();
186
187 let mut chapters = Vec::new();
188 let mut volumes = Vec::new();
189 for it in eval_list(&toc_list(), CATALOG).unwrap() {
190 let nm = eval_value(&name, &it, &vars).unwrap();
191 if eval_value(&is_volume, &it, &vars)
192 .unwrap()
193 .trim()
194 .is_empty()
195 {
196 let u = eval_value(&url, &it, &vars).unwrap();
197 chapters.push((nm, u));
198 } else {
199 volumes.push(nm);
200 }
201 }
202 assert_eq!(volumes, vec!["第一卷 魔性不改", "第二卷 魔子出山"]);
203 assert_eq!(chapters.len(), 3);
204 assert_eq!(
205 chapters[0],
206 ("第一章 甲".to_string(), "/n/1.html".to_string())
207 );
208 assert_eq!(
209 chapters[2],
210 ("第三章 丙".to_string(), "/n/3.html".to_string())
211 );
212 }
213
214 #[test]
215 fn book_info_extracts_og_meta_attr() {
216 let html = r#"<head><meta property="og:novel:book_name" content="蛊真人"><meta property="og:image" content="https://x/c.jpg"></head>"#;
217 let name = rule(
218 r#"{"via":"css","select":"[property=\"og:novel:book_name\"]","extract":{"attr":"content"}}"#,
219 );
220 assert_eq!(eval_value(&name, html, &Vars::new()).unwrap(), "蛊真人");
221 }
222
223 #[test]
224 fn content_html_extract_cleans_paragraphs() {
225 let html = r#"<div class="article-content"><p>第一段。</p><p>第二段。</p></div>"#;
226 let r = rule(
227 r#"{"via":"css","select":".article-content","extract":"html","clean":[{"trim":true}]}"#,
228 );
229 let out = eval_value(&r, html, &Vars::new()).unwrap();
230 assert!(out.contains("第一段。"));
231 assert!(out.contains("第二段。"));
232 assert!(out.contains('\n'), "段落间应有换行");
233 }
234
235 #[test]
236 fn clean_font_map_restores_via_inline_table() {
237 let r = rule(r#"{"via":"raw","clean":[{"fontMap":{"E001":"甲","E002":"乙"}}]}"#);
239 assert_eq!(
240 eval_value(&r, "\u{E001}\u{E002}!", &Vars::new()).unwrap(),
241 "甲乙!"
242 );
243 }
244
245 #[test]
246 fn template_interpolates_vars() {
247 let r = rule(r#"{"template":"{{base}}/search?q={{key}}&pg={{page}}"}"#);
248 let mut vars = Vars::new();
249 vars.insert("base".into(), "https://x.com".into());
250 vars.insert("key".into(), "蛊真人".into());
251 vars.insert("page".into(), "2".into());
252 assert_eq!(
253 eval_value(&r, "", &vars).unwrap(),
254 "https://x.com/search?q=蛊真人&pg=2"
255 );
256 }
257
258 #[test]
259 fn firstof_falls_back_to_second_when_first_empty() {
260 let r = rule(
261 r#"{"firstOf":[{"via":"css","select":".nope","extract":"text"},{"via":"css","select":"h2","extract":"text"}]}"#,
262 );
263 let html = r#"<h2>标题</h2>"#;
264 assert_eq!(eval_value(&r, html, &Vars::new()).unwrap(), "标题");
265 }
266
267 #[test]
268 fn clean_regex_replace_strips_boilerplate() {
269 let r = rule(
270 r#"{"via":"raw","clean":[{"regex":"请收藏本站[^\\n]*","replace":""},{"trim":true}]}"#,
271 );
272 let out = eval_value(&r, "正文内容 请收藏本站xxx.com", &Vars::new()).unwrap();
273 assert_eq!(out, "正文内容");
274 }
275
276 #[test]
277 fn clean_pipeline_decrypts_content() {
278 use crate::source::{ByteEnc, CipherAlgo, CipherMode, CipherOp, CipherStep, Padding};
281 let plain = "蛊真人 第一章 正文……";
282 let ct = transform::cipher(
283 plain,
284 &CipherStep {
285 algo: CipherAlgo::Aes,
286 mode: CipherMode::Cbc,
287 padding: Padding::Pkcs7,
288 op: CipherOp::Encrypt,
289 key: "0123456789abcdef".into(),
290 key_enc: ByteEnc::Utf8,
291 iv: Some("abcdef9876543210".into()),
292 iv_enc: ByteEnc::Utf8,
293 input_enc: Some(ByteEnc::Utf8),
294 output_enc: Some(ByteEnc::Base64),
295 },
296 )
297 .unwrap();
298
299 let r = rule(
301 r#"{"via":"raw","clean":[{"cipher":{"algo":"aes","mode":"cbc","key":"0123456789abcdef","iv":"abcdef9876543210"}}]}"#,
302 );
303 let out = eval_value(&r, &ct, &Vars::new()).unwrap();
304 assert_eq!(out, plain);
305 }
306
307 #[test]
308 fn clean_cipher_error_propagates() {
309 let r = rule(
311 r#"{"via":"raw","clean":[{"cipher":{"algo":"aes","mode":"cbc","key":"0123456789abcdef","iv":"abcdef9876543210"}}]}"#,
312 );
313 let err = eval_value(&r, "!!!not-base64!!!", &Vars::new());
314 assert!(matches!(
315 err,
316 Err(EvalError::Codec(_) | EvalError::Crypto(_))
317 ));
318 }
319
320 #[test]
321 fn js_rule_parses_regardless_of_feature() {
322 assert!(matches!(rule(r#"{"js":"result + '!'"}"#), Rule::Js { .. }));
324 let r = rule(r#"{"via":"raw","clean":[{"js":"result"}]}"#);
325 assert!(matches!(r, Rule::Leaf(_)));
326 }
327
328 #[cfg(not(feature = "js"))]
329 #[test]
330 fn js_rule_unsupported_without_feature() {
331 let r = rule(r#"{"js":"result + '!'"}"#);
332 assert!(matches!(
333 eval_value(&r, "x", &Vars::new()),
334 Err(EvalError::Unsupported("js"))
335 ));
336 }
337
338 #[cfg(feature = "js")]
339 #[test]
340 fn js_rule_evaluates_with_feature() {
341 let r = rule(r#"{"js":"result + '!'"}"#);
342 assert_eq!(eval_value(&r, "x", &Vars::new()).unwrap(), "x!");
343 let r2 = rule(r#"{"via":"raw","clean":[{"js":"result.toUpperCase()"}]}"#);
345 assert_eq!(eval_value(&r2, "abc", &Vars::new()).unwrap(), "ABC");
346 }
347}