use super::cookie::{
CookieJar, merge_login_into_headers, registrable_domain, request_registrable_domain,
};
use super::error::{BookSourceError, Result};
use super::eval::{Vars, eval_list, eval_value, interpolate};
use super::fetch::{FetchRequest, Fetcher, ReqwestFetcher};
use super::model::{BookInfo, BookListItem, Chapter, Toc, Volume};
use super::source::{
BookRules, BookSource, Capture, Category, Method, PreStep, Rule, UrlOrRule, VarScope,
};
use std::collections::{BTreeMap, HashMap};
use std::sync::{Arc, RwLock};
#[derive(Clone)]
pub struct Engine {
source: Arc<BookSource>,
fetcher: Arc<dyn Fetcher>,
login_header: BTreeMap<String, String>,
cookies: Arc<RwLock<CookieJar>>,
source_vars: Arc<RwLock<BTreeMap<String, String>>>,
book_vars: Arc<RwLock<BTreeMap<String, String>>>,
}
impl std::fmt::Debug for Engine {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Engine")
.field("source", &self.source.name)
.finish_non_exhaustive()
}
}
impl Engine {
pub fn new(source: BookSource) -> Result<Self> {
let fetcher = Arc::new(ReqwestFetcher::new(&source)?);
Ok(Self::with_fetcher(source, fetcher))
}
pub fn with_fetcher(source: BookSource, fetcher: Arc<dyn Fetcher>) -> Self {
Self {
source: Arc::new(source),
fetcher,
login_header: BTreeMap::new(),
cookies: Arc::new(RwLock::new(CookieJar::default())),
source_vars: Arc::new(RwLock::new(BTreeMap::new())),
book_vars: Arc::new(RwLock::new(BTreeMap::new())),
}
}
#[must_use]
pub fn with_login_header(mut self, login_header: BTreeMap<String, String>) -> Self {
self.login_header = login_header;
self
}
#[must_use]
pub fn with_cookies(self, persistent: &BTreeMap<String, String>) -> Self {
if let Ok(mut jar) = self.cookies.write() {
*jar = CookieJar::from_persistent(persistent);
}
self
}
pub fn persistent_cookies(&self) -> BTreeMap<String, String> {
self.cookies
.read()
.map(|j| j.persistent())
.unwrap_or_default()
}
#[must_use]
pub fn with_book_vars(self, book_vars: BTreeMap<String, String>) -> Self {
if let Ok(mut g) = self.book_vars.write() {
*g = book_vars;
}
self
}
#[must_use]
pub fn with_source_vars(self, source_vars: &BTreeMap<String, String>) -> Self {
if let Ok(mut g) = self.source_vars.write() {
for (k, v) in source_vars {
g.insert(k.clone(), v.clone());
}
}
self
}
pub fn book_vars(&self) -> BTreeMap<String, String> {
self.book_vars.read().map(|g| g.clone()).unwrap_or_default()
}
pub fn source_vars(&self) -> BTreeMap<String, String> {
self.source_vars
.read()
.map(|g| g.clone())
.unwrap_or_default()
}
#[cfg(feature = "browser")]
pub fn with_browser_assist(
source: BookSource,
browser: Option<crate::browser::BrowserFetcher>,
) -> Result<Self> {
let fetcher = crate::browser::EscalatingFetcher::new(&source, browser)?;
Ok(Self::with_fetcher(source, Arc::new(fetcher)))
}
pub fn source(&self) -> &BookSource {
&self.source
}
pub fn source_url(&self) -> &str {
&self.source.url
}
fn base_vars(&self) -> Vars {
let mut v = Vars::new();
v.insert(
"base".into(),
self.source.url.trim_end_matches('/').to_string(),
);
v
}
fn get_req(&self, url: impl Into<String>) -> FetchRequest {
let mut req = FetchRequest::get(url);
let url = req.url.clone();
self.apply_auth(&url, &mut req.headers);
req
}
fn request_domain(&self, url: &str) -> String {
request_registrable_domain(url, ®istrable_domain(&self.source.url))
}
fn apply_auth(&self, url: &str, headers: &mut HashMap<String, String>) {
let source_domain = registrable_domain(&self.source.url);
let domain = request_registrable_domain(url, &source_domain);
let jar_cookie = self
.cookies
.read()
.ok()
.and_then(|j| j.cookie_header(&domain));
merge_login_into_headers(
&self.login_header,
&source_domain,
&domain,
jar_cookie.as_deref(),
headers,
);
}
async fn run_request(&self, req: FetchRequest) -> Result<String> {
let domain = self.request_domain(&req.url);
let resp = self.fetcher.fetch_full(req).await?;
if self.source.enabled_cookie_jar
&& let Some(set_cookie) = resp.headers.get("set-cookie")
&& let Ok(mut jar) = self.cookies.write()
{
jar.absorb_set_cookie(&domain, set_cookie);
}
self.check_login(&resp.body)?;
Ok(resp.body)
}
async fn fetch_checked(&self, url: impl Into<String>) -> Result<String> {
self.run_request(self.get_req(url)).await
}
fn check_login(&self, response: &str) -> Result<()> {
let js = self.source.login_check_js.trim();
if js.is_empty() {
return Ok(());
}
#[cfg(feature = "js")]
{
let vars = self.base_vars();
let verdict = eval_value(&Rule::Js { js: js.to_string() }, response, &vars)?;
if matches!(verdict.trim(), "" | "false" | "0") {
return Err(BookSourceError::LoginExpired);
}
}
let _ = response;
Ok(())
}
pub async fn warmup(&self) {
for u in &self.source.http.warmup {
let _ = self.run_request(self.get_req(u.clone())).await;
}
}
pub async fn book_info(&self, book_url: &str) -> Result<BookInfo> {
let mut chapter = self.base_vars();
self.run_prelude(&self.source.book_info.prelude, &mut chapter)
.await?;
let html = self.fetch_checked(book_url).await?;
let rules = self.source.book_info.as_book_rules();
self.eval_book_info(&rules, &html, &self.flatten(&chapter))
}
pub async fn toc(&self, toc_url: &str) -> Result<Toc> {
let toc = &self.source.toc;
let mut chapter = self.base_vars();
self.run_prelude(&toc.prelude, &mut chapter).await?;
let vars = self.flatten(&chapter);
let pages = self
.fetch_pages(toc_url, toc.next_page.as_ref(), toc.max_pages, &vars)
.await?;
let mut chapters: Vec<Chapter> = Vec::new();
let mut volumes: Vec<Volume> = Vec::new();
for page in &pages {
for item in eval_list(&toc.list, page)? {
let title = eval_value(&toc.name, &item, &vars)?;
let is_volume = match &toc.is_volume {
Some(r) => !eval_value(r, &item, &vars)?.trim().is_empty(),
None => false,
};
if is_volume {
volumes.push(Volume {
title,
first_chapter_index: chapters.len(),
});
} else {
let url = eval_value(&toc.url, &item, &vars)?;
chapters.push(Chapter {
title,
url,
is_volume: false,
});
}
}
}
Ok(Toc { chapters, volumes })
}
pub async fn content(&self, chapter_url: &str) -> Result<String> {
let c = &self.source.content;
let mut chapter = self.base_vars();
self.run_prelude(&c.prelude, &mut chapter).await?;
let vars = self.flatten(&chapter);
let pages = self
.fetch_pages(chapter_url, c.next_page.as_ref(), c.max_pages, &vars)
.await?;
let mut parts = Vec::with_capacity(pages.len());
for page in &pages {
parts.push(eval_value(&c.value, page, &vars)?);
}
Ok(parts.join("\n"))
}
pub async fn search(&self, key: &str, page: u32, page_size: u32) -> Result<Vec<BookListItem>> {
let op = self
.source
.search
.as_ref()
.ok_or(BookSourceError::Missing("search"))?;
let mut chapter = self.base_vars();
chapter.insert("key".into(), key.to_string());
chapter.insert("page".into(), page.to_string());
chapter.insert("pageSize".into(), page_size.to_string());
self.run_prelude(&op.prelude, &mut chapter).await?;
let vars = self.flatten(&chapter);
let html = self
.send_templated(
&op.request.url,
op.request.method,
op.request.body.as_ref(),
&op.request.headers,
&vars,
)
.await?;
let flat = self.flatten(&chapter);
for (name, rule) in &op.request.vars {
let v = eval_value(rule, &html, &flat)?;
if !v.is_empty() {
chapter.insert(name.clone(), v);
}
}
self.eval_list_items(&op.list, &op.item, &html, &self.flatten(&chapter))
}
pub async fn explore(
&self,
category_url: &UrlOrRule,
page: u32,
page_size: u32,
) -> Result<Vec<BookListItem>> {
let op = self
.source
.explore
.as_ref()
.ok_or(BookSourceError::Missing("explore"))?;
let mut chapter = self.base_vars();
chapter.insert("page".into(), page.to_string());
chapter.insert("pageSize".into(), page_size.to_string());
self.run_prelude(&op.prelude, &mut chapter).await?;
let vars = self.flatten(&chapter);
let url = self.resolve_url(category_url, &vars)?;
let html = self.fetch_checked(url).await?;
self.eval_list_items(&op.list, &op.item, &html, &vars)
}
pub fn explore_categories(&self) -> Vec<Category> {
self.source
.explore
.as_ref()
.map(|e| e.categories.clone())
.unwrap_or_default()
}
fn flatten(&self, chapter: &Vars) -> Vars {
let mut out = Vars::new();
if let Ok(g) = self.source_vars.read() {
out.extend(g.iter().map(|(k, v)| (k.clone(), v.clone())));
}
if let Ok(g) = self.book_vars.read() {
out.extend(g.iter().map(|(k, v)| (k.clone(), v.clone())));
}
out.extend(chapter.iter().map(|(k, v)| (k.clone(), v.clone())));
out
}
async fn run_prelude(&self, steps: &[PreStep], chapter: &mut Vars) -> Result<()> {
for step in steps {
if !step.skip_if_present.is_empty() {
let flat = self.flatten(chapter);
if step
.skip_if_present
.iter()
.all(|k| flat.get(k).is_some_and(|v| !v.is_empty()))
{
continue;
}
}
let flat = self.flatten(chapter);
let resp = self
.send_templated(
&step.url,
step.method,
step.body.as_ref(),
&step.headers,
&flat,
)
.await?;
self.capture_into(&step.capture, &resp, chapter)?;
}
Ok(())
}
async fn send_templated(
&self,
url: &UrlOrRule,
method: Method,
body: Option<&UrlOrRule>,
headers: &HashMap<String, String>,
vars: &Vars,
) -> Result<String> {
let url = self.resolve_url(url, vars)?;
let body = match body {
Some(b) => Some(self.resolve_url(b, vars)?),
None => None,
};
let mut hdrs = HashMap::with_capacity(headers.len());
for (k, v) in headers {
hdrs.insert(k.clone(), interpolate(v, vars));
}
self.apply_auth(&url, &mut hdrs);
self.run_request(FetchRequest {
url,
method,
body,
headers: hdrs,
})
.await
}
fn capture_into(&self, caps: &[Capture], body: &str, chapter: &mut Vars) -> Result<()> {
for cap in caps {
let v = eval_value(&cap.value, body, &self.flatten(chapter))?;
if v.is_empty() {
continue;
}
match cap.scope {
VarScope::Chapter => {
chapter.insert(cap.name.clone(), v);
}
VarScope::Book => {
if let Ok(mut g) = self.book_vars.write() {
g.insert(cap.name.clone(), v);
}
}
VarScope::Source => {
if let Ok(mut g) = self.source_vars.write() {
g.insert(cap.name.clone(), v);
}
}
}
}
Ok(())
}
async fn fetch_pages(
&self,
start: &str,
next_page: Option<&Rule>,
max_pages: u32,
vars: &Vars,
) -> Result<Vec<String>> {
let mut pages = Vec::new();
let mut url = start.to_string();
for _ in 0..max_pages.max(1) {
let html = self.fetch_checked(url.clone()).await?;
let next = match next_page {
Some(r) => eval_value(r, &html, vars)?,
None => String::new(),
};
pages.push(html);
if next.trim().is_empty() {
break;
}
url = next;
}
Ok(pages)
}
fn eval_list_items(
&self,
list: &Rule,
item: &BookRules,
html: &str,
vars: &Vars,
) -> Result<Vec<BookListItem>> {
let mut out = Vec::new();
for ctx in eval_list(list, html)? {
let info = self.eval_book_info(item, &ctx, vars)?;
let book_url = opt_eval(item.book_url.as_ref(), &ctx, vars)?;
out.push(BookListItem { info, book_url });
}
Ok(out)
}
fn eval_book_info(&self, r: &BookRules, ctx: &str, vars: &Vars) -> Result<BookInfo> {
Ok(BookInfo {
name: opt_eval(r.name.as_ref(), ctx, vars)?,
author: opt_eval(r.author.as_ref(), ctx, vars)?,
cover: opt_eval(r.cover.as_ref(), ctx, vars)?,
intro: opt_eval(r.intro.as_ref(), ctx, vars)?,
kind: opt_eval(r.kind.as_ref(), ctx, vars)?,
last_chapter: opt_eval(r.last_chapter.as_ref(), ctx, vars)?,
toc_url: opt_eval(r.toc_url.as_ref(), ctx, vars)?,
word_count: opt_eval(r.word_count.as_ref(), ctx, vars)?,
})
}
fn resolve_url(&self, u: &UrlOrRule, vars: &Vars) -> Result<String> {
Ok(match u {
UrlOrRule::Str(s) => eval_value(
&Rule::Template {
template: s.clone(),
},
"",
vars,
)?,
UrlOrRule::Rule(r) => eval_value(r, "", vars)?,
})
}
}
fn opt_eval(rule: Option<&Rule>, ctx: &str, vars: &Vars) -> Result<String> {
Ok(match rule {
Some(r) => eval_value(r, ctx, vars)?,
None => String::new(),
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::FetchError;
use crate::fetch::{FetchResponse, Fetcher};
use async_trait::async_trait;
use std::sync::Mutex;
struct MockFetcher(String);
#[async_trait]
impl Fetcher for MockFetcher {
async fn fetch(&self, _req: FetchRequest) -> std::result::Result<String, FetchError> {
Ok(self.0.clone())
}
}
struct RecordingFetcher {
body: String,
last_headers: Arc<Mutex<HashMap<String, String>>>,
}
#[async_trait]
impl Fetcher for RecordingFetcher {
async fn fetch(&self, req: FetchRequest) -> std::result::Result<String, FetchError> {
*self.last_headers.lock().unwrap() = req.headers;
Ok(self.body.clone())
}
}
struct CookieEchoFetcher {
set_cookie: String,
last_cookie: Arc<Mutex<Option<String>>>,
}
#[async_trait]
impl Fetcher for CookieEchoFetcher {
async fn fetch(&self, req: FetchRequest) -> std::result::Result<String, FetchError> {
self.fetch_full(req).await.map(|r| r.body)
}
async fn fetch_full(
&self,
req: FetchRequest,
) -> std::result::Result<FetchResponse, FetchError> {
*self.last_cookie.lock().unwrap() = req.headers.get("Cookie").cloned();
let mut headers = HashMap::new();
headers.insert("set-cookie".to_string(), self.set_cookie.clone());
Ok(FetchResponse {
body: CATALOG.to_string(),
status: 200,
headers,
})
}
}
const CATALOG: &str = r#"<html><body><div class="box">
<span id="shuqian"><h2 class="module-title type">阅读进度</h2></span>
<h2 class="module-title type">第一卷</h2>
<div class="module-row-info"><a class="module-row-text" href="/n/1.html"><div class="module-row-title"><span>第一章</span></div></a></div>
<div class="module-row-info"><a class="module-row-text" href="/n/2.html"><div class="module-row-title"><span>第二章</span></div></a></div>
<h2 class="module-title type">第二卷</h2>
<div class="module-row-info"><a class="module-row-text" href="/n/3.html"><div class="module-row-title"><span>第三章</span></div></a></div>
</div></body></html>"#;
const SOURCE: &str = r#"{
"schema":"trnovel-booksource/v2","name":"t","url":"https://x",
"bookInfo":{},
"toc":{
"list":{"via":"css","select":".box > h2.module-title.type, .box a.module-row-text"},
"name":{"firstOf":[{"via":"css","select":".module-row-title","extract":"text"},{"via":"css","select":"h2","extract":"text"}]},
"url":{"via":"css","select":"a","extract":{"attr":"href"}},
"isVolume":{"via":"css","select":"h2","extract":"text"},
"maxPages":1
},
"content":{"value":{"via":"css","select":".article-content","extract":"text"}}
}"#;
#[tokio::test]
async fn engine_toc_splits_volumes_offline() {
let src = BookSource::from_json(SOURCE).unwrap();
let engine = Engine::with_fetcher(src, Arc::new(MockFetcher(CATALOG.to_string())));
let toc = engine.toc("/any").await.unwrap();
assert_eq!(toc.volumes.len(), 2, "应识别 2 卷");
assert_eq!(toc.chapters.len(), 3, "应识别 3 章");
assert_eq!(toc.chapters[0].title, "第一章");
assert_eq!(toc.chapters[0].url, "/n/1.html");
assert_eq!(toc.volumes[1].first_chapter_index, 2);
}
#[tokio::test]
async fn engine_merges_login_header_into_requests() {
let src = BookSource::from_json(SOURCE).unwrap();
let captured = Arc::new(Mutex::new(HashMap::new()));
let fetcher = Arc::new(RecordingFetcher {
body: CATALOG.to_string(),
last_headers: captured.clone(),
});
let mut lh = BTreeMap::new();
lh.insert("Authorization".into(), "Bearer T".into());
lh.insert("Cookie".into(), "sid=1".into());
let engine = Engine::with_fetcher(src, fetcher).with_login_header(lh);
engine.toc("/any").await.unwrap();
let h = captured.lock().unwrap();
assert_eq!(
h.get("Authorization").map(String::as_str),
Some("Bearer T"),
"JWT 应每请求携带"
);
assert_eq!(
h.get("Cookie").map(String::as_str),
Some("sid=1"),
"Cookie 走同一注入路径"
);
}
#[tokio::test]
async fn login_header_not_sent_to_other_registrable_domain() {
let src = BookSource::from_json(SOURCE).unwrap(); let captured = Arc::new(Mutex::new(HashMap::new()));
let fetcher = Arc::new(RecordingFetcher {
body: CATALOG.to_string(),
last_headers: captured.clone(),
});
let mut lh = BTreeMap::new();
lh.insert("Authorization".into(), "Bearer T".into());
lh.insert("Cookie".into(), "sid=1".into());
let engine = Engine::with_fetcher(src, fetcher).with_login_header(lh);
engine.toc("https://evil.example.org/any").await.unwrap();
let h = captured.lock().unwrap();
assert!(
h.get("Authorization").is_none(),
"跨注册域不应携带登录头: {h:?}"
);
assert!(
h.get("Cookie").is_none(),
"跨注册域不应携带登录 Cookie: {h:?}"
);
}
#[tokio::test]
async fn newline_in_login_header_sanitized_in_engine_requests() {
let src = BookSource::from_json(SOURCE).unwrap();
let captured = Arc::new(Mutex::new(HashMap::new()));
let fetcher = Arc::new(RecordingFetcher {
body: CATALOG.to_string(),
last_headers: captured.clone(),
});
let mut lh = BTreeMap::new();
lh.insert("Cookie".into(), "a=1\nb=2".into());
let engine = Engine::with_fetcher(src, fetcher).with_login_header(lh);
engine.toc("/any").await.unwrap();
let h = captured.lock().unwrap();
let cookie = h.get("Cookie").cloned().unwrap_or_default();
assert!(!cookie.contains('\n'), "Cookie 的 \\n 应被剥除: {cookie:?}");
assert_eq!(cookie, "a=1b=2", "与 host 侧 sanitize 行为对称");
}
#[tokio::test]
async fn engine_without_login_header_adds_nothing() {
let src = BookSource::from_json(SOURCE).unwrap();
let captured = Arc::new(Mutex::new(HashMap::new()));
let fetcher = Arc::new(RecordingFetcher {
body: CATALOG.to_string(),
last_headers: captured.clone(),
});
let engine = Engine::with_fetcher(src, fetcher);
engine.toc("/any").await.unwrap();
assert!(captured.lock().unwrap().is_empty(), "未登录不应注入额外头");
}
#[cfg(feature = "js")]
#[tokio::test]
async fn login_check_js_detects_expired() {
let json = SOURCE.replacen(
"\"bookInfo\":{}",
"\"loginCheckJs\":\"result.indexOf('未登录')<0\",\"bookInfo\":{}",
1,
);
let src = BookSource::from_json(&json).unwrap();
let bad = Engine::with_fetcher(
src.clone(),
Arc::new(MockFetcher("<html>未登录</html>".into())),
);
let err = bad.toc("/any").await.unwrap_err();
assert!(err.is_login_expired(), "应判登录失效: {err}");
let ok = Engine::with_fetcher(src, Arc::new(MockFetcher(CATALOG.to_string())));
assert!(ok.toc("/any").await.is_ok(), "正常响应不应判失效");
}
#[tokio::test]
async fn enabled_cookie_jar_absorbs_resends_and_persists() {
let json = SOURCE.replacen(
"\"bookInfo\":{}",
"\"enabledCookieJar\":true,\"bookInfo\":{}",
1,
);
let src = BookSource::from_json(&json).unwrap();
let last = Arc::new(Mutex::new(None));
let fetcher = Arc::new(CookieEchoFetcher {
set_cookie: "token=xyz; Max-Age=3600; Path=/".to_string(),
last_cookie: last.clone(),
});
let engine = Engine::with_fetcher(src, fetcher);
engine.toc("/p1").await.unwrap();
assert!(last.lock().unwrap().is_none(), "首请求不应带 cookie");
engine.book_info("/p2").await.unwrap();
assert_eq!(
last.lock().unwrap().clone(),
Some("token=xyz".to_string()),
"回灌 cookie 应随后续请求发出"
);
assert_eq!(
engine.persistent_cookies().get("x").map(String::as_str),
Some("token=xyz")
);
}
#[tokio::test]
async fn warmup_absorbs_set_cookie_into_jar() {
let json = SOURCE.replacen(
"\"bookInfo\":{}",
"\"enabledCookieJar\":true,\"http\":{\"warmup\":[\"https://x/warm\"]},\"bookInfo\":{}",
1,
);
let src = BookSource::from_json(&json).unwrap();
let last = Arc::new(Mutex::new(None));
let fetcher = Arc::new(CookieEchoFetcher {
set_cookie: "token=warm; Max-Age=3600; Path=/".to_string(),
last_cookie: last.clone(),
});
let engine = Engine::with_fetcher(src, fetcher);
engine.warmup().await;
assert_eq!(
engine.persistent_cookies().get("x").map(String::as_str),
Some("token=warm"),
"预热页的 Set-Cookie 应回灌引擎 cookie 库"
);
}
#[tokio::test]
async fn cookie_jar_disabled_does_not_absorb() {
let src = BookSource::from_json(SOURCE).unwrap();
let last = Arc::new(Mutex::new(None));
let fetcher = Arc::new(CookieEchoFetcher {
set_cookie: "token=xyz; Max-Age=3600".to_string(),
last_cookie: last.clone(),
});
let engine = Engine::with_fetcher(src, fetcher);
engine.toc("/p1").await.unwrap();
engine.book_info("/p2").await.unwrap();
assert!(
last.lock().unwrap().is_none(),
"未开 cookieJar 不应回灌/再发"
);
assert!(engine.persistent_cookies().is_empty());
}
struct ScriptedFetcher {
routes: Vec<(String, String)>,
calls: Arc<Mutex<Vec<String>>>,
}
#[async_trait]
impl Fetcher for ScriptedFetcher {
async fn fetch(&self, req: FetchRequest) -> std::result::Result<String, FetchError> {
self.calls.lock().unwrap().push(req.url.clone());
for (pat, body) in &self.routes {
if req.url.contains(pat.as_str()) {
return Ok(body.clone());
}
}
Ok(String::new())
}
}
fn scripted(routes: Vec<(&str, &str)>) -> (Arc<ScriptedFetcher>, Arc<Mutex<Vec<String>>>) {
let calls = Arc::new(Mutex::new(Vec::new()));
let f = Arc::new(ScriptedFetcher {
routes: routes
.into_iter()
.map(|(a, b)| (a.to_string(), b.to_string()))
.collect(),
calls: calls.clone(),
});
(f, calls)
}
#[tokio::test]
async fn prelude_captures_token_into_main_request() {
let json = r#"{
"schema":"trnovel-booksource/v2","name":"t","url":"https://x",
"search":{
"prelude":[{"url":{"template":"{{base}}/prepare"},
"capture":[{"name":"token","value":{"via":"raw","clean":[{"trim":true}]},"scope":"chapter"}]}],
"request":{"url":{"template":"{{base}}/search?kw={{key}}&token={{token}}"}},
"list":{"via":"css","select":".item"},
"item":{"name":{"via":"css","select":".t","extract":"text"}}
},
"bookInfo":{},
"toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
"content":{"value":{"via":"css","select":".c"}}
}"#;
let src = BookSource::from_json(json).unwrap();
let (f, calls) = scripted(vec![
("/prepare", "ABC"),
(
"/search",
r#"<div class="item"><span class="t">书名</span></div>"#,
),
]);
let engine = Engine::with_fetcher(src, f);
let items = engine.search("k", 1, 20).await.unwrap();
assert_eq!(items.len(), 1);
assert_eq!(items[0].info.name, "书名");
let c = calls.lock().unwrap();
assert!(
c.iter().any(|u| u.contains("/prepare")),
"应先跑前置 prepare: {c:?}"
);
assert!(
c.iter().any(|u| u.contains("token=ABC")),
"主搜索应带捕获的 token: {c:?}"
);
}
#[tokio::test]
async fn skip_if_present_reuses_source_scope_token() {
let json = r#"{
"schema":"trnovel-booksource/v2","name":"t","url":"https://x",
"search":{
"prelude":[{"url":{"template":"{{base}}/prepare"},
"capture":[{"name":"token","value":{"via":"raw","clean":[{"trim":true}]},"scope":"source"}],
"skipIfPresent":["token"]}],
"request":{"url":{"template":"{{base}}/search?token={{token}}"}},
"list":{"via":"css","select":".item"},
"item":{"name":{"via":"css","select":".t","extract":"text"}}
},
"bookInfo":{},
"toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
"content":{"value":{"via":"css","select":".c"}}
}"#;
let src = BookSource::from_json(json).unwrap();
let (f, calls) = scripted(vec![
("/prepare", "TKN"),
(
"/search",
r#"<div class="item"><span class="t">x</span></div>"#,
),
]);
let engine = Engine::with_fetcher(src, f);
engine.search("a", 1, 20).await.unwrap();
engine.search("b", 1, 20).await.unwrap();
let prepares = calls
.lock()
.unwrap()
.iter()
.filter(|u| u.contains("/prepare"))
.count();
assert_eq!(
prepares, 1,
"skipIfPresent 应使 source 级 token 复用,prepare 只跑一次"
);
assert_eq!(
engine.source_vars().get("token").map(String::as_str),
Some("TKN")
);
}
#[tokio::test]
async fn request_vars_visible_to_list_items() {
let json = r#"{
"schema":"trnovel-booksource/v2","name":"t","url":"https://x",
"search":{
"request":{"url":{"template":"{{base}}/s"},
"vars":{"site":{"via":"css","select":".site","extract":"text"}}},
"list":{"via":"css","select":".item"},
"item":{"name":{"template":"{{site}}-书"}}
},
"bookInfo":{},
"toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
"content":{"value":{"via":"css","select":".c"}}
}"#;
let src = BookSource::from_json(json).unwrap();
let html = r#"<span class="site">甲站</span><div class="item">x</div>"#;
let engine = Engine::with_fetcher(src, Arc::new(MockFetcher(html.to_string())));
let items = engine.search("k", 1, 20).await.unwrap();
assert_eq!(items.len(), 1);
assert_eq!(
items[0].info.name, "甲站-书",
"item 模板应看到主请求捕获的 site"
);
}
#[tokio::test]
async fn empty_capture_not_written() {
let json = r#"{
"schema":"trnovel-booksource/v2","name":"t","url":"https://x",
"search":{
"prelude":[{"url":{"template":"{{base}}/p"},
"capture":[{"name":"x","value":{"via":"css","select":".nope","extract":"text"},"scope":"source"}]}],
"request":{"url":{"template":"{{base}}/s?x={{x}}"}},
"list":{"via":"css","select":".item"},
"item":{"name":{"via":"css","select":".t","extract":"text"}}
},
"bookInfo":{},
"toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
"content":{"value":{"via":"css","select":".c"}}
}"#;
let src = BookSource::from_json(json).unwrap();
let (f, calls) = scripted(vec![
("/p", "<html></html>"),
("/s", r#"<div class="item"><span class="t">y</span></div>"#),
]);
let engine = Engine::with_fetcher(src, f);
engine.search("k", 1, 20).await.unwrap();
assert!(
!engine.source_vars().contains_key("x"),
"空串捕获不应写作用域层"
);
assert!(
calls.lock().unwrap().iter().any(|u| u.contains("/s?x=")),
"主请求应照常发出(x 为空串)"
);
}
#[tokio::test]
async fn toc_prelude_csrf_visible_to_extraction() {
let json = r#"{
"schema":"trnovel-booksource/v2","name":"t","url":"https://x",
"bookInfo":{},
"toc":{
"prelude":[{"url":{"template":"{{base}}/prepare"},
"capture":[{"name":"csrf","value":{"via":"raw","clean":[{"trim":true}]},"scope":"chapter"}]}],
"list":{"via":"css","select":".ch"},
"name":{"via":"css","select":"a","extract":"text"},
"url":{"concat":[{"literal":"/c?sign="},{"template":"{{csrf}}"},{"literal":"&href="},{"via":"css","select":"a","extract":{"attr":"href"}}]},
"maxPages":1
},
"content":{"value":{"via":"css","select":".c"}}
}"#;
let src = BookSource::from_json(json).unwrap();
let (f, _calls) = scripted(vec![
("/prepare", "SIG"),
(
"/toc",
r#"<div class="ch"><a href="/n/1.html">第一章</a></div>"#,
),
]);
let engine = Engine::with_fetcher(src, f);
let toc = engine.toc("/toc/1").await.unwrap();
assert_eq!(toc.chapters.len(), 1);
assert_eq!(
toc.chapters[0].url, "/c?sign=SIG&href=/n/1.html",
"目录 url 应拼入前置捕获的 csrf"
);
}
#[tokio::test]
async fn main_request_headers_interpolate_captured_vars() {
let json = r#"{
"schema":"trnovel-booksource/v2","name":"t","url":"https://x",
"search":{
"prelude":[{"url":{"template":"{{base}}/prepare"},
"capture":[{"name":"token","value":{"via":"raw","clean":[{"trim":true}]},"scope":"chapter"}]}],
"request":{"url":{"template":"{{base}}/search"},
"headers":{"Authorization":"Bearer {{token}}"}},
"list":{"via":"css","select":".item"},
"item":{"name":{"via":"css","select":".t","extract":"text"}}
},
"bookInfo":{},
"toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
"content":{"value":{"via":"css","select":".c"}}
}"#;
let src = BookSource::from_json(json).unwrap();
let seen = Arc::new(Mutex::new(None));
struct HeaderProbe {
seen: Arc<Mutex<Option<String>>>,
}
#[async_trait]
impl Fetcher for HeaderProbe {
async fn fetch(&self, req: FetchRequest) -> std::result::Result<String, FetchError> {
if req.url.contains("/search") {
*self.seen.lock().unwrap() = req.headers.get("Authorization").cloned();
return Ok(r#"<div class="item"><span class="t">书</span></div>"#.to_string());
}
Ok("ABC".to_string()) }
}
let engine = Engine::with_fetcher(src, Arc::new(HeaderProbe { seen: seen.clone() }));
engine.search("k", 1, 20).await.unwrap();
assert_eq!(
seen.lock().unwrap().clone(),
Some("Bearer ABC".to_string()),
"主请求 header 应插值前置捕获的 token"
);
}
#[tokio::test]
async fn multiple_request_vars_all_captured() {
let json = r#"{
"schema":"trnovel-booksource/v2","name":"t","url":"https://x",
"search":{
"request":{"url":{"template":"{{base}}/s"},
"vars":{
"a":{"via":"css","select":".a","extract":"text"},
"b":{"via":"css","select":".b","extract":"text"}
}},
"list":{"via":"css","select":".item"},
"item":{"name":{"template":"{{a}}-{{b}}"}}
},
"bookInfo":{},
"toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
"content":{"value":{"via":"css","select":".c"}}
}"#;
let src = BookSource::from_json(json).unwrap();
let html = r#"<span class="a">甲</span><span class="b">乙</span><div class="item">x</div>"#;
let engine = Engine::with_fetcher(src, Arc::new(MockFetcher(html.to_string())));
let items = engine.search("k", 1, 20).await.unwrap();
assert_eq!(
items[0].info.name, "甲-乙",
"多条 request.vars 应都被捕获且对 item 可见"
);
}
}