1use super::cookie::{
5 CookieJar, merge_login_into_headers, registrable_domain, request_registrable_domain,
6};
7use super::error::{BookSourceError, Result};
8use super::eval::{Vars, eval_list, eval_value, interpolate};
9use super::fetch::{FetchRequest, Fetcher, ReqwestFetcher};
10use super::model::{BookInfo, BookListItem, Chapter, Toc, Volume};
11use super::source::{
12 BookRules, BookSource, Capture, Category, Method, PreStep, Rule, UrlOrRule, VarScope,
13};
14use std::collections::{BTreeMap, HashMap};
15use std::sync::{Arc, RwLock};
16
17#[derive(Clone)]
19pub struct Engine {
20 source: Arc<BookSource>,
21 fetcher: Arc<dyn Fetcher>,
22 login_header: BTreeMap<String, String>,
26 cookies: Arc<RwLock<CookieJar>>,
29 source_vars: Arc<RwLock<BTreeMap<String, String>>>,
32 book_vars: Arc<RwLock<BTreeMap<String, String>>>,
35}
36
37impl std::fmt::Debug for Engine {
38 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39 f.debug_struct("Engine")
40 .field("source", &self.source.name)
41 .finish_non_exhaustive()
42 }
43}
44
45impl Engine {
46 pub fn new(source: BookSource) -> Result<Self> {
48 let fetcher = Arc::new(ReqwestFetcher::new(&source)?);
49 Ok(Self::with_fetcher(source, fetcher))
50 }
51
52 pub fn with_fetcher(source: BookSource, fetcher: Arc<dyn Fetcher>) -> Self {
56 Self {
57 source: Arc::new(source),
58 fetcher,
59 login_header: BTreeMap::new(),
60 cookies: Arc::new(RwLock::new(CookieJar::default())),
61 source_vars: Arc::new(RwLock::new(BTreeMap::new())),
62 book_vars: Arc::new(RwLock::new(BTreeMap::new())),
63 }
64 }
65
66 #[must_use]
69 pub fn with_login_header(mut self, login_header: BTreeMap<String, String>) -> Self {
70 self.login_header = login_header;
71 self
72 }
73
74 #[must_use]
76 pub fn with_cookies(self, persistent: &BTreeMap<String, String>) -> Self {
77 if let Ok(mut jar) = self.cookies.write() {
78 *jar = CookieJar::from_persistent(persistent);
79 }
80 self
81 }
82
83 pub fn persistent_cookies(&self) -> BTreeMap<String, String> {
86 self.cookies
87 .read()
88 .map(|j| j.persistent())
89 .unwrap_or_default()
90 }
91
92 #[must_use]
94 pub fn with_book_vars(self, book_vars: BTreeMap<String, String>) -> Self {
95 if let Ok(mut g) = self.book_vars.write() {
96 *g = book_vars;
97 }
98 self
99 }
100
101 #[must_use]
103 pub fn with_source_vars(self, source_vars: &BTreeMap<String, String>) -> Self {
104 if let Ok(mut g) = self.source_vars.write() {
105 for (k, v) in source_vars {
106 g.insert(k.clone(), v.clone());
107 }
108 }
109 self
110 }
111
112 pub fn book_vars(&self) -> BTreeMap<String, String> {
114 self.book_vars.read().map(|g| g.clone()).unwrap_or_default()
115 }
116
117 pub fn source_vars(&self) -> BTreeMap<String, String> {
119 self.source_vars
120 .read()
121 .map(|g| g.clone())
122 .unwrap_or_default()
123 }
124
125 #[cfg(feature = "browser")]
129 pub fn with_browser_assist(
130 source: BookSource,
131 browser: Option<crate::browser::BrowserFetcher>,
132 ) -> Result<Self> {
133 let fetcher = crate::browser::EscalatingFetcher::new(&source, browser)?;
134 Ok(Self::with_fetcher(source, Arc::new(fetcher)))
135 }
136
137 pub fn source(&self) -> &BookSource {
139 &self.source
140 }
141
142 pub fn source_url(&self) -> &str {
144 &self.source.url
145 }
146
147 fn base_vars(&self) -> Vars {
148 let mut v = Vars::new();
149 v.insert(
150 "base".into(),
151 self.source.url.trim_end_matches('/').to_string(),
152 );
153 v
154 }
155
156 fn get_req(&self, url: impl Into<String>) -> FetchRequest {
158 let mut req = FetchRequest::get(url);
159 let url = req.url.clone();
160 self.apply_auth(&url, &mut req.headers);
161 req
162 }
163
164 fn request_domain(&self, url: &str) -> String {
166 request_registrable_domain(url, ®istrable_domain(&self.source.url))
167 }
168
169 fn apply_auth(&self, url: &str, headers: &mut HashMap<String, String>) {
174 let source_domain = registrable_domain(&self.source.url);
175 let domain = request_registrable_domain(url, &source_domain);
176 let jar_cookie = self
177 .cookies
178 .read()
179 .ok()
180 .and_then(|j| j.cookie_header(&domain));
181 merge_login_into_headers(
182 &self.login_header,
183 &source_domain,
184 &domain,
185 jar_cookie.as_deref(),
186 headers,
187 );
188 }
189
190 async fn run_request(&self, req: FetchRequest) -> Result<String> {
193 let domain = self.request_domain(&req.url);
194 let resp = self.fetcher.fetch_full(req).await?;
195 if self.source.enabled_cookie_jar
196 && let Some(set_cookie) = resp.headers.get("set-cookie")
197 && let Ok(mut jar) = self.cookies.write()
198 {
199 jar.absorb_set_cookie(&domain, set_cookie);
200 }
201 self.check_login(&resp.body)?;
202 Ok(resp.body)
203 }
204
205 async fn fetch_checked(&self, url: impl Into<String>) -> Result<String> {
207 self.run_request(self.get_req(url)).await
208 }
209
210 fn check_login(&self, response: &str) -> Result<()> {
214 let js = self.source.login_check_js.trim();
215 if js.is_empty() {
216 return Ok(());
217 }
218 #[cfg(feature = "js")]
219 {
220 let vars = self.base_vars();
221 let verdict = eval_value(&Rule::Js { js: js.to_string() }, response, &vars)?;
222 if matches!(verdict.trim(), "" | "false" | "0") {
223 return Err(BookSourceError::LoginExpired);
224 }
225 }
226 let _ = response;
227 Ok(())
228 }
229
230 pub async fn warmup(&self) {
234 for u in &self.source.http.warmup {
235 let _ = self.run_request(self.get_req(u.clone())).await;
236 }
237 }
238
239 pub async fn book_info(&self, book_url: &str) -> Result<BookInfo> {
241 let mut chapter = self.base_vars();
242 self.run_prelude(&self.source.book_info.prelude, &mut chapter)
243 .await?;
244 let html = self.fetch_checked(book_url).await?;
245 let rules = self.source.book_info.as_book_rules();
246 self.eval_book_info(&rules, &html, &self.flatten(&chapter))
247 }
248
249 pub async fn toc(&self, toc_url: &str) -> Result<Toc> {
251 let toc = &self.source.toc;
252 let mut chapter = self.base_vars();
253 self.run_prelude(&toc.prelude, &mut chapter).await?;
254 let vars = self.flatten(&chapter);
255 let pages = self
256 .fetch_pages(toc_url, toc.next_page.as_ref(), toc.max_pages, &vars)
257 .await?;
258
259 let mut chapters: Vec<Chapter> = Vec::new();
260 let mut volumes: Vec<Volume> = Vec::new();
261 for page in &pages {
262 for item in eval_list(&toc.list, page)? {
263 let title = eval_value(&toc.name, &item, &vars)?;
264 let is_volume = match &toc.is_volume {
265 Some(r) => !eval_value(r, &item, &vars)?.trim().is_empty(),
266 None => false,
267 };
268 if is_volume {
269 volumes.push(Volume {
270 title,
271 first_chapter_index: chapters.len(),
272 });
273 } else {
274 let url = eval_value(&toc.url, &item, &vars)?;
275 chapters.push(Chapter {
276 title,
277 url,
278 is_volume: false,
279 });
280 }
281 }
282 }
283 Ok(Toc { chapters, volumes })
284 }
285
286 pub async fn content(&self, chapter_url: &str) -> Result<String> {
288 let c = &self.source.content;
289 let mut chapter = self.base_vars();
290 self.run_prelude(&c.prelude, &mut chapter).await?;
291 let vars = self.flatten(&chapter);
292 let pages = self
293 .fetch_pages(chapter_url, c.next_page.as_ref(), c.max_pages, &vars)
294 .await?;
295 let mut parts = Vec::with_capacity(pages.len());
296 for page in &pages {
297 parts.push(eval_value(&c.value, page, &vars)?);
298 }
299 Ok(parts.join("\n"))
300 }
301
302 pub async fn search(&self, key: &str, page: u32, page_size: u32) -> Result<Vec<BookListItem>> {
304 let op = self
305 .source
306 .search
307 .as_ref()
308 .ok_or(BookSourceError::Missing("search"))?;
309 let mut chapter = self.base_vars();
310 chapter.insert("key".into(), key.to_string());
311 chapter.insert("page".into(), page.to_string());
312 chapter.insert("pageSize".into(), page_size.to_string());
313 self.run_prelude(&op.prelude, &mut chapter).await?;
314
315 let vars = self.flatten(&chapter);
316 let html = self
317 .send_templated(
318 &op.request.url,
319 op.request.method,
320 op.request.body.as_ref(),
321 &op.request.headers,
322 &vars,
323 )
324 .await?;
325 let flat = self.flatten(&chapter);
329 for (name, rule) in &op.request.vars {
330 let v = eval_value(rule, &html, &flat)?;
331 if !v.is_empty() {
332 chapter.insert(name.clone(), v);
333 }
334 }
335 self.eval_list_items(&op.list, &op.item, &html, &self.flatten(&chapter))
336 }
337
338 pub async fn explore(
340 &self,
341 category_url: &UrlOrRule,
342 page: u32,
343 page_size: u32,
344 ) -> Result<Vec<BookListItem>> {
345 let op = self
346 .source
347 .explore
348 .as_ref()
349 .ok_or(BookSourceError::Missing("explore"))?;
350 let mut chapter = self.base_vars();
351 chapter.insert("page".into(), page.to_string());
352 chapter.insert("pageSize".into(), page_size.to_string());
353 self.run_prelude(&op.prelude, &mut chapter).await?;
354 let vars = self.flatten(&chapter);
355 let url = self.resolve_url(category_url, &vars)?;
356 let html = self.fetch_checked(url).await?;
357 self.eval_list_items(&op.list, &op.item, &html, &vars)
358 }
359
360 pub fn explore_categories(&self) -> Vec<Category> {
362 self.source
363 .explore
364 .as_ref()
365 .map(|e| e.categories.clone())
366 .unwrap_or_default()
367 }
368
369 fn flatten(&self, chapter: &Vars) -> Vars {
374 let mut out = Vars::new();
375 if let Ok(g) = self.source_vars.read() {
376 out.extend(g.iter().map(|(k, v)| (k.clone(), v.clone())));
377 }
378 if let Ok(g) = self.book_vars.read() {
379 out.extend(g.iter().map(|(k, v)| (k.clone(), v.clone())));
380 }
381 out.extend(chapter.iter().map(|(k, v)| (k.clone(), v.clone())));
382 out
383 }
384
385 async fn run_prelude(&self, steps: &[PreStep], chapter: &mut Vars) -> Result<()> {
389 for step in steps {
390 if !step.skip_if_present.is_empty() {
392 let flat = self.flatten(chapter);
393 if step
394 .skip_if_present
395 .iter()
396 .all(|k| flat.get(k).is_some_and(|v| !v.is_empty()))
397 {
398 continue;
399 }
400 }
401 let flat = self.flatten(chapter);
402 let resp = self
403 .send_templated(
404 &step.url,
405 step.method,
406 step.body.as_ref(),
407 &step.headers,
408 &flat,
409 )
410 .await?;
411 self.capture_into(&step.capture, &resp, chapter)?;
412 }
413 Ok(())
414 }
415
416 async fn send_templated(
422 &self,
423 url: &UrlOrRule,
424 method: Method,
425 body: Option<&UrlOrRule>,
426 headers: &HashMap<String, String>,
427 vars: &Vars,
428 ) -> Result<String> {
429 let url = self.resolve_url(url, vars)?;
430 let body = match body {
431 Some(b) => Some(self.resolve_url(b, vars)?),
432 None => None,
433 };
434 let mut hdrs = HashMap::with_capacity(headers.len());
435 for (k, v) in headers {
436 hdrs.insert(k.clone(), interpolate(v, vars));
437 }
438 self.apply_auth(&url, &mut hdrs);
439 self.run_request(FetchRequest {
440 url,
441 method,
442 body,
443 headers: hdrs,
444 })
445 .await
446 }
447
448 fn capture_into(&self, caps: &[Capture], body: &str, chapter: &mut Vars) -> Result<()> {
450 for cap in caps {
451 let v = eval_value(&cap.value, body, &self.flatten(chapter))?;
452 if v.is_empty() {
453 continue;
454 }
455 match cap.scope {
456 VarScope::Chapter => {
457 chapter.insert(cap.name.clone(), v);
458 }
459 VarScope::Book => {
460 if let Ok(mut g) = self.book_vars.write() {
461 g.insert(cap.name.clone(), v);
462 }
463 }
464 VarScope::Source => {
465 if let Ok(mut g) = self.source_vars.write() {
466 g.insert(cap.name.clone(), v);
467 }
468 }
469 }
470 }
471 Ok(())
472 }
473
474 async fn fetch_pages(
476 &self,
477 start: &str,
478 next_page: Option<&Rule>,
479 max_pages: u32,
480 vars: &Vars,
481 ) -> Result<Vec<String>> {
482 let mut pages = Vec::new();
483 let mut url = start.to_string();
484 for _ in 0..max_pages.max(1) {
485 let html = self.fetch_checked(url.clone()).await?;
486 let next = match next_page {
487 Some(r) => eval_value(r, &html, vars)?,
488 None => String::new(),
489 };
490 pages.push(html);
491 if next.trim().is_empty() {
492 break;
493 }
494 url = next;
495 }
496 Ok(pages)
497 }
498
499 fn eval_list_items(
500 &self,
501 list: &Rule,
502 item: &BookRules,
503 html: &str,
504 vars: &Vars,
505 ) -> Result<Vec<BookListItem>> {
506 let mut out = Vec::new();
507 for ctx in eval_list(list, html)? {
508 let info = self.eval_book_info(item, &ctx, vars)?;
509 let book_url = opt_eval(item.book_url.as_ref(), &ctx, vars)?;
510 out.push(BookListItem { info, book_url });
511 }
512 Ok(out)
513 }
514
515 fn eval_book_info(&self, r: &BookRules, ctx: &str, vars: &Vars) -> Result<BookInfo> {
516 Ok(BookInfo {
517 name: opt_eval(r.name.as_ref(), ctx, vars)?,
518 author: opt_eval(r.author.as_ref(), ctx, vars)?,
519 cover: opt_eval(r.cover.as_ref(), ctx, vars)?,
520 intro: opt_eval(r.intro.as_ref(), ctx, vars)?,
521 kind: opt_eval(r.kind.as_ref(), ctx, vars)?,
522 last_chapter: opt_eval(r.last_chapter.as_ref(), ctx, vars)?,
523 toc_url: opt_eval(r.toc_url.as_ref(), ctx, vars)?,
524 word_count: opt_eval(r.word_count.as_ref(), ctx, vars)?,
525 })
526 }
527
528 fn resolve_url(&self, u: &UrlOrRule, vars: &Vars) -> Result<String> {
529 Ok(match u {
530 UrlOrRule::Str(s) => eval_value(
532 &Rule::Template {
533 template: s.clone(),
534 },
535 "",
536 vars,
537 )?,
538 UrlOrRule::Rule(r) => eval_value(r, "", vars)?,
539 })
540 }
541}
542
543fn opt_eval(rule: Option<&Rule>, ctx: &str, vars: &Vars) -> Result<String> {
545 Ok(match rule {
546 Some(r) => eval_value(r, ctx, vars)?,
547 None => String::new(),
548 })
549}
550
551#[cfg(test)]
552mod tests {
553 use super::*;
554 use crate::error::FetchError;
555 use crate::fetch::{FetchResponse, Fetcher};
556 use async_trait::async_trait;
557
558 use std::sync::Mutex;
559
560 struct MockFetcher(String);
562
563 #[async_trait]
564 impl Fetcher for MockFetcher {
565 async fn fetch(&self, _req: FetchRequest) -> std::result::Result<String, FetchError> {
566 Ok(self.0.clone())
567 }
568 }
569
570 struct RecordingFetcher {
572 body: String,
573 last_headers: Arc<Mutex<HashMap<String, String>>>,
574 }
575
576 #[async_trait]
577 impl Fetcher for RecordingFetcher {
578 async fn fetch(&self, req: FetchRequest) -> std::result::Result<String, FetchError> {
579 *self.last_headers.lock().unwrap() = req.headers;
580 Ok(self.body.clone())
581 }
582 }
583
584 struct CookieEchoFetcher {
586 set_cookie: String,
587 last_cookie: Arc<Mutex<Option<String>>>,
588 }
589
590 #[async_trait]
591 impl Fetcher for CookieEchoFetcher {
592 async fn fetch(&self, req: FetchRequest) -> std::result::Result<String, FetchError> {
593 self.fetch_full(req).await.map(|r| r.body)
594 }
595 async fn fetch_full(
596 &self,
597 req: FetchRequest,
598 ) -> std::result::Result<FetchResponse, FetchError> {
599 *self.last_cookie.lock().unwrap() = req.headers.get("Cookie").cloned();
600 let mut headers = HashMap::new();
601 headers.insert("set-cookie".to_string(), self.set_cookie.clone());
602 Ok(FetchResponse {
603 body: CATALOG.to_string(),
604 status: 200,
605 headers,
606 })
607 }
608 }
609
610 const CATALOG: &str = r#"<html><body><div class="box">
611 <span id="shuqian"><h2 class="module-title type">阅读进度</h2></span>
612 <h2 class="module-title type">第一卷</h2>
613 <div class="module-row-info"><a class="module-row-text" href="/n/1.html"><div class="module-row-title"><span>第一章</span></div></a></div>
614 <div class="module-row-info"><a class="module-row-text" href="/n/2.html"><div class="module-row-title"><span>第二章</span></div></a></div>
615 <h2 class="module-title type">第二卷</h2>
616 <div class="module-row-info"><a class="module-row-text" href="/n/3.html"><div class="module-row-title"><span>第三章</span></div></a></div>
617 </div></body></html>"#;
618
619 const SOURCE: &str = r#"{
620 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
621 "bookInfo":{},
622 "toc":{
623 "list":{"via":"css","select":".box > h2.module-title.type, .box a.module-row-text"},
624 "name":{"firstOf":[{"via":"css","select":".module-row-title","extract":"text"},{"via":"css","select":"h2","extract":"text"}]},
625 "url":{"via":"css","select":"a","extract":{"attr":"href"}},
626 "isVolume":{"via":"css","select":"h2","extract":"text"},
627 "maxPages":1
628 },
629 "content":{"value":{"via":"css","select":".article-content","extract":"text"}}
630 }"#;
631
632 #[tokio::test]
633 async fn engine_toc_splits_volumes_offline() {
634 let src = BookSource::from_json(SOURCE).unwrap();
635 let engine = Engine::with_fetcher(src, Arc::new(MockFetcher(CATALOG.to_string())));
636 let toc = engine.toc("/any").await.unwrap();
637 assert_eq!(toc.volumes.len(), 2, "应识别 2 卷");
638 assert_eq!(toc.chapters.len(), 3, "应识别 3 章");
639 assert_eq!(toc.chapters[0].title, "第一章");
640 assert_eq!(toc.chapters[0].url, "/n/1.html");
641 assert_eq!(toc.volumes[1].first_chapter_index, 2);
642 }
643
644 #[tokio::test]
646 async fn engine_merges_login_header_into_requests() {
647 let src = BookSource::from_json(SOURCE).unwrap();
648 let captured = Arc::new(Mutex::new(HashMap::new()));
649 let fetcher = Arc::new(RecordingFetcher {
650 body: CATALOG.to_string(),
651 last_headers: captured.clone(),
652 });
653 let mut lh = BTreeMap::new();
654 lh.insert("Authorization".into(), "Bearer T".into());
655 lh.insert("Cookie".into(), "sid=1".into());
656 let engine = Engine::with_fetcher(src, fetcher).with_login_header(lh);
657
658 engine.toc("/any").await.unwrap();
660 let h = captured.lock().unwrap();
661 assert_eq!(
662 h.get("Authorization").map(String::as_str),
663 Some("Bearer T"),
664 "JWT 应每请求携带"
665 );
666 assert_eq!(
667 h.get("Cookie").map(String::as_str),
668 Some("sid=1"),
669 "Cookie 走同一注入路径"
670 );
671 }
672
673 #[tokio::test]
675 async fn login_header_not_sent_to_other_registrable_domain() {
676 let src = BookSource::from_json(SOURCE).unwrap(); let captured = Arc::new(Mutex::new(HashMap::new()));
678 let fetcher = Arc::new(RecordingFetcher {
679 body: CATALOG.to_string(),
680 last_headers: captured.clone(),
681 });
682 let mut lh = BTreeMap::new();
683 lh.insert("Authorization".into(), "Bearer T".into());
684 lh.insert("Cookie".into(), "sid=1".into());
685 let engine = Engine::with_fetcher(src, fetcher).with_login_header(lh);
686 engine.toc("https://evil.example.org/any").await.unwrap();
688 let h = captured.lock().unwrap();
689 assert!(
690 h.get("Authorization").is_none(),
691 "跨注册域不应携带登录头: {h:?}"
692 );
693 assert!(
694 h.get("Cookie").is_none(),
695 "跨注册域不应携带登录 Cookie: {h:?}"
696 );
697 }
698
699 #[tokio::test]
701 async fn newline_in_login_header_sanitized_in_engine_requests() {
702 let src = BookSource::from_json(SOURCE).unwrap();
703 let captured = Arc::new(Mutex::new(HashMap::new()));
704 let fetcher = Arc::new(RecordingFetcher {
705 body: CATALOG.to_string(),
706 last_headers: captured.clone(),
707 });
708 let mut lh = BTreeMap::new();
709 lh.insert("Cookie".into(), "a=1\nb=2".into());
711 let engine = Engine::with_fetcher(src, fetcher).with_login_header(lh);
712 engine.toc("/any").await.unwrap();
713 let h = captured.lock().unwrap();
714 let cookie = h.get("Cookie").cloned().unwrap_or_default();
715 assert!(!cookie.contains('\n'), "Cookie 的 \\n 应被剥除: {cookie:?}");
716 assert_eq!(cookie, "a=1b=2", "与 host 侧 sanitize 行为对称");
717 }
718
719 #[tokio::test]
721 async fn engine_without_login_header_adds_nothing() {
722 let src = BookSource::from_json(SOURCE).unwrap();
723 let captured = Arc::new(Mutex::new(HashMap::new()));
724 let fetcher = Arc::new(RecordingFetcher {
725 body: CATALOG.to_string(),
726 last_headers: captured.clone(),
727 });
728 let engine = Engine::with_fetcher(src, fetcher);
729 engine.toc("/any").await.unwrap();
730 assert!(captured.lock().unwrap().is_empty(), "未登录不应注入额外头");
731 }
732
733 #[cfg(feature = "js")]
735 #[tokio::test]
736 async fn login_check_js_detects_expired() {
737 let json = SOURCE.replacen(
738 "\"bookInfo\":{}",
739 "\"loginCheckJs\":\"result.indexOf('未登录')<0\",\"bookInfo\":{}",
740 1,
741 );
742 let src = BookSource::from_json(&json).unwrap();
743 let bad = Engine::with_fetcher(
745 src.clone(),
746 Arc::new(MockFetcher("<html>未登录</html>".into())),
747 );
748 let err = bad.toc("/any").await.unwrap_err();
749 assert!(err.is_login_expired(), "应判登录失效: {err}");
750 let ok = Engine::with_fetcher(src, Arc::new(MockFetcher(CATALOG.to_string())));
752 assert!(ok.toc("/any").await.is_ok(), "正常响应不应判失效");
753 }
754
755 #[tokio::test]
757 async fn enabled_cookie_jar_absorbs_resends_and_persists() {
758 let json = SOURCE.replacen(
759 "\"bookInfo\":{}",
760 "\"enabledCookieJar\":true,\"bookInfo\":{}",
761 1,
762 );
763 let src = BookSource::from_json(&json).unwrap();
764 let last = Arc::new(Mutex::new(None));
765 let fetcher = Arc::new(CookieEchoFetcher {
766 set_cookie: "token=xyz; Max-Age=3600; Path=/".to_string(),
767 last_cookie: last.clone(),
768 });
769 let engine = Engine::with_fetcher(src, fetcher);
770
771 engine.toc("/p1").await.unwrap();
773 assert!(last.lock().unwrap().is_none(), "首请求不应带 cookie");
774 engine.book_info("/p2").await.unwrap();
776 assert_eq!(
777 last.lock().unwrap().clone(),
778 Some("token=xyz".to_string()),
779 "回灌 cookie 应随后续请求发出"
780 );
781 assert_eq!(
784 engine.persistent_cookies().get("x").map(String::as_str),
785 Some("token=xyz")
786 );
787 }
788
789 #[tokio::test]
791 async fn warmup_absorbs_set_cookie_into_jar() {
792 let json = SOURCE.replacen(
793 "\"bookInfo\":{}",
794 "\"enabledCookieJar\":true,\"http\":{\"warmup\":[\"https://x/warm\"]},\"bookInfo\":{}",
795 1,
796 );
797 let src = BookSource::from_json(&json).unwrap();
798 let last = Arc::new(Mutex::new(None));
799 let fetcher = Arc::new(CookieEchoFetcher {
800 set_cookie: "token=warm; Max-Age=3600; Path=/".to_string(),
801 last_cookie: last.clone(),
802 });
803 let engine = Engine::with_fetcher(src, fetcher);
804 engine.warmup().await;
805 assert_eq!(
807 engine.persistent_cookies().get("x").map(String::as_str),
808 Some("token=warm"),
809 "预热页的 Set-Cookie 应回灌引擎 cookie 库"
810 );
811 }
812
813 #[tokio::test]
815 async fn cookie_jar_disabled_does_not_absorb() {
816 let src = BookSource::from_json(SOURCE).unwrap();
817 let last = Arc::new(Mutex::new(None));
818 let fetcher = Arc::new(CookieEchoFetcher {
819 set_cookie: "token=xyz; Max-Age=3600".to_string(),
820 last_cookie: last.clone(),
821 });
822 let engine = Engine::with_fetcher(src, fetcher);
823 engine.toc("/p1").await.unwrap();
824 engine.book_info("/p2").await.unwrap();
825 assert!(
826 last.lock().unwrap().is_none(),
827 "未开 cookieJar 不应回灌/再发"
828 );
829 assert!(engine.persistent_cookies().is_empty());
830 }
831
832 struct ScriptedFetcher {
836 routes: Vec<(String, String)>,
837 calls: Arc<Mutex<Vec<String>>>,
838 }
839
840 #[async_trait]
841 impl Fetcher for ScriptedFetcher {
842 async fn fetch(&self, req: FetchRequest) -> std::result::Result<String, FetchError> {
843 self.calls.lock().unwrap().push(req.url.clone());
844 for (pat, body) in &self.routes {
845 if req.url.contains(pat.as_str()) {
846 return Ok(body.clone());
847 }
848 }
849 Ok(String::new())
850 }
851 }
852
853 fn scripted(routes: Vec<(&str, &str)>) -> (Arc<ScriptedFetcher>, Arc<Mutex<Vec<String>>>) {
854 let calls = Arc::new(Mutex::new(Vec::new()));
855 let f = Arc::new(ScriptedFetcher {
856 routes: routes
857 .into_iter()
858 .map(|(a, b)| (a.to_string(), b.to_string()))
859 .collect(),
860 calls: calls.clone(),
861 });
862 (f, calls)
863 }
864
865 #[tokio::test]
867 async fn prelude_captures_token_into_main_request() {
868 let json = r#"{
869 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
870 "search":{
871 "prelude":[{"url":{"template":"{{base}}/prepare"},
872 "capture":[{"name":"token","value":{"via":"raw","clean":[{"trim":true}]},"scope":"chapter"}]}],
873 "request":{"url":{"template":"{{base}}/search?kw={{key}}&token={{token}}"}},
874 "list":{"via":"css","select":".item"},
875 "item":{"name":{"via":"css","select":".t","extract":"text"}}
876 },
877 "bookInfo":{},
878 "toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
879 "content":{"value":{"via":"css","select":".c"}}
880 }"#;
881 let src = BookSource::from_json(json).unwrap();
882 let (f, calls) = scripted(vec![
883 ("/prepare", "ABC"),
884 (
885 "/search",
886 r#"<div class="item"><span class="t">书名</span></div>"#,
887 ),
888 ]);
889 let engine = Engine::with_fetcher(src, f);
890 let items = engine.search("k", 1, 20).await.unwrap();
891 assert_eq!(items.len(), 1);
892 assert_eq!(items[0].info.name, "书名");
893 let c = calls.lock().unwrap();
894 assert!(
895 c.iter().any(|u| u.contains("/prepare")),
896 "应先跑前置 prepare: {c:?}"
897 );
898 assert!(
899 c.iter().any(|u| u.contains("token=ABC")),
900 "主搜索应带捕获的 token: {c:?}"
901 );
902 }
903
904 #[tokio::test]
906 async fn skip_if_present_reuses_source_scope_token() {
907 let json = r#"{
908 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
909 "search":{
910 "prelude":[{"url":{"template":"{{base}}/prepare"},
911 "capture":[{"name":"token","value":{"via":"raw","clean":[{"trim":true}]},"scope":"source"}],
912 "skipIfPresent":["token"]}],
913 "request":{"url":{"template":"{{base}}/search?token={{token}}"}},
914 "list":{"via":"css","select":".item"},
915 "item":{"name":{"via":"css","select":".t","extract":"text"}}
916 },
917 "bookInfo":{},
918 "toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
919 "content":{"value":{"via":"css","select":".c"}}
920 }"#;
921 let src = BookSource::from_json(json).unwrap();
922 let (f, calls) = scripted(vec![
923 ("/prepare", "TKN"),
924 (
925 "/search",
926 r#"<div class="item"><span class="t">x</span></div>"#,
927 ),
928 ]);
929 let engine = Engine::with_fetcher(src, f);
930 engine.search("a", 1, 20).await.unwrap();
931 engine.search("b", 1, 20).await.unwrap();
932 let prepares = calls
933 .lock()
934 .unwrap()
935 .iter()
936 .filter(|u| u.contains("/prepare"))
937 .count();
938 assert_eq!(
939 prepares, 1,
940 "skipIfPresent 应使 source 级 token 复用,prepare 只跑一次"
941 );
942 assert_eq!(
943 engine.source_vars().get("token").map(String::as_str),
944 Some("TKN")
945 );
946 }
947
948 #[tokio::test]
950 async fn request_vars_visible_to_list_items() {
951 let json = r#"{
952 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
953 "search":{
954 "request":{"url":{"template":"{{base}}/s"},
955 "vars":{"site":{"via":"css","select":".site","extract":"text"}}},
956 "list":{"via":"css","select":".item"},
957 "item":{"name":{"template":"{{site}}-书"}}
958 },
959 "bookInfo":{},
960 "toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
961 "content":{"value":{"via":"css","select":".c"}}
962 }"#;
963 let src = BookSource::from_json(json).unwrap();
964 let html = r#"<span class="site">甲站</span><div class="item">x</div>"#;
965 let engine = Engine::with_fetcher(src, Arc::new(MockFetcher(html.to_string())));
966 let items = engine.search("k", 1, 20).await.unwrap();
967 assert_eq!(items.len(), 1);
968 assert_eq!(
969 items[0].info.name, "甲站-书",
970 "item 模板应看到主请求捕获的 site"
971 );
972 }
973
974 #[tokio::test]
976 async fn empty_capture_not_written() {
977 let json = r#"{
978 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
979 "search":{
980 "prelude":[{"url":{"template":"{{base}}/p"},
981 "capture":[{"name":"x","value":{"via":"css","select":".nope","extract":"text"},"scope":"source"}]}],
982 "request":{"url":{"template":"{{base}}/s?x={{x}}"}},
983 "list":{"via":"css","select":".item"},
984 "item":{"name":{"via":"css","select":".t","extract":"text"}}
985 },
986 "bookInfo":{},
987 "toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
988 "content":{"value":{"via":"css","select":".c"}}
989 }"#;
990 let src = BookSource::from_json(json).unwrap();
991 let (f, calls) = scripted(vec![
992 ("/p", "<html></html>"),
993 ("/s", r#"<div class="item"><span class="t">y</span></div>"#),
994 ]);
995 let engine = Engine::with_fetcher(src, f);
996 engine.search("k", 1, 20).await.unwrap();
997 assert!(
998 !engine.source_vars().contains_key("x"),
999 "空串捕获不应写作用域层"
1000 );
1001 assert!(
1002 calls.lock().unwrap().iter().any(|u| u.contains("/s?x=")),
1003 "主请求应照常发出(x 为空串)"
1004 );
1005 }
1006
1007 #[tokio::test]
1009 async fn toc_prelude_csrf_visible_to_extraction() {
1010 let json = r#"{
1011 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
1012 "bookInfo":{},
1013 "toc":{
1014 "prelude":[{"url":{"template":"{{base}}/prepare"},
1015 "capture":[{"name":"csrf","value":{"via":"raw","clean":[{"trim":true}]},"scope":"chapter"}]}],
1016 "list":{"via":"css","select":".ch"},
1017 "name":{"via":"css","select":"a","extract":"text"},
1018 "url":{"concat":[{"literal":"/c?sign="},{"template":"{{csrf}}"},{"literal":"&href="},{"via":"css","select":"a","extract":{"attr":"href"}}]},
1019 "maxPages":1
1020 },
1021 "content":{"value":{"via":"css","select":".c"}}
1022 }"#;
1023 let src = BookSource::from_json(json).unwrap();
1024 let (f, _calls) = scripted(vec![
1025 ("/prepare", "SIG"),
1026 (
1027 "/toc",
1028 r#"<div class="ch"><a href="/n/1.html">第一章</a></div>"#,
1029 ),
1030 ]);
1031 let engine = Engine::with_fetcher(src, f);
1032 let toc = engine.toc("/toc/1").await.unwrap();
1033 assert_eq!(toc.chapters.len(), 1);
1034 assert_eq!(
1035 toc.chapters[0].url, "/c?sign=SIG&href=/n/1.html",
1036 "目录 url 应拼入前置捕获的 csrf"
1037 );
1038 }
1039
1040 #[tokio::test]
1042 async fn main_request_headers_interpolate_captured_vars() {
1043 let json = r#"{
1044 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
1045 "search":{
1046 "prelude":[{"url":{"template":"{{base}}/prepare"},
1047 "capture":[{"name":"token","value":{"via":"raw","clean":[{"trim":true}]},"scope":"chapter"}]}],
1048 "request":{"url":{"template":"{{base}}/search"},
1049 "headers":{"Authorization":"Bearer {{token}}"}},
1050 "list":{"via":"css","select":".item"},
1051 "item":{"name":{"via":"css","select":".t","extract":"text"}}
1052 },
1053 "bookInfo":{},
1054 "toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
1055 "content":{"value":{"via":"css","select":".c"}}
1056 }"#;
1057 let src = BookSource::from_json(json).unwrap();
1058 let seen = Arc::new(Mutex::new(None));
1059 struct HeaderProbe {
1060 seen: Arc<Mutex<Option<String>>>,
1061 }
1062 #[async_trait]
1063 impl Fetcher for HeaderProbe {
1064 async fn fetch(&self, req: FetchRequest) -> std::result::Result<String, FetchError> {
1065 if req.url.contains("/search") {
1066 *self.seen.lock().unwrap() = req.headers.get("Authorization").cloned();
1067 return Ok(r#"<div class="item"><span class="t">书</span></div>"#.to_string());
1068 }
1069 Ok("ABC".to_string()) }
1071 }
1072 let engine = Engine::with_fetcher(src, Arc::new(HeaderProbe { seen: seen.clone() }));
1073 engine.search("k", 1, 20).await.unwrap();
1074 assert_eq!(
1075 seen.lock().unwrap().clone(),
1076 Some("Bearer ABC".to_string()),
1077 "主请求 header 应插值前置捕获的 token"
1078 );
1079 }
1080
1081 #[tokio::test]
1083 async fn multiple_request_vars_all_captured() {
1084 let json = r#"{
1085 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
1086 "search":{
1087 "request":{"url":{"template":"{{base}}/s"},
1088 "vars":{
1089 "a":{"via":"css","select":".a","extract":"text"},
1090 "b":{"via":"css","select":".b","extract":"text"}
1091 }},
1092 "list":{"via":"css","select":".item"},
1093 "item":{"name":{"template":"{{a}}-{{b}}"}}
1094 },
1095 "bookInfo":{},
1096 "toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
1097 "content":{"value":{"via":"css","select":".c"}}
1098 }"#;
1099 let src = BookSource::from_json(json).unwrap();
1100 let html = r#"<span class="a">甲</span><span class="b">乙</span><div class="item">x</div>"#;
1101 let engine = Engine::with_fetcher(src, Arc::new(MockFetcher(html.to_string())));
1102 let items = engine.search("k", 1, 20).await.unwrap();
1103 assert_eq!(
1104 items[0].info.name, "甲-乙",
1105 "多条 request.vars 应都被捕获且对 item 可见"
1106 );
1107 }
1108}