1use super::error::{BookSourceError, Result};
5use super::eval::{Vars, eval_list, eval_value};
6use super::fetch::{FetchRequest, Fetcher, ReqwestFetcher};
7use super::model::{BookInfo, BookListItem, Chapter, Toc, Volume};
8use super::source::{BookRules, BookSource, Category, Rule, UrlOrRule};
9use std::sync::Arc;
10
11#[derive(Clone)]
13pub struct Engine {
14 source: Arc<BookSource>,
15 fetcher: Arc<dyn Fetcher>,
16}
17
18impl std::fmt::Debug for Engine {
19 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
20 f.debug_struct("Engine")
21 .field("source", &self.source.name)
22 .finish_non_exhaustive()
23 }
24}
25
26impl Engine {
27 pub fn new(source: BookSource) -> Result<Self> {
29 let fetcher = Arc::new(ReqwestFetcher::new(&source)?);
30 Ok(Self {
31 source: Arc::new(source),
32 fetcher,
33 })
34 }
35
36 pub fn with_fetcher(source: BookSource, fetcher: Arc<dyn Fetcher>) -> Self {
38 Self {
39 source: Arc::new(source),
40 fetcher,
41 }
42 }
43
44 #[cfg(feature = "browser")]
48 pub fn with_browser_assist(
49 source: BookSource,
50 browser: Option<crate::browser::BrowserFetcher>,
51 ) -> Result<Self> {
52 let fetcher = crate::browser::EscalatingFetcher::new(&source, browser)?;
53 Ok(Self {
54 source: Arc::new(source),
55 fetcher: Arc::new(fetcher),
56 })
57 }
58
59 pub fn source(&self) -> &BookSource {
61 &self.source
62 }
63
64 fn base_vars(&self) -> Vars {
65 let mut v = Vars::new();
66 v.insert(
67 "base".into(),
68 self.source.url.trim_end_matches('/').to_string(),
69 );
70 v
71 }
72
73 pub async fn warmup(&self) {
75 for u in &self.source.http.warmup {
76 let _ = self.fetcher.fetch(FetchRequest::get(u.clone())).await;
77 }
78 }
79
80 pub async fn book_info(&self, book_url: &str) -> Result<BookInfo> {
82 let html = self.fetcher.fetch(FetchRequest::get(book_url)).await?;
83 let vars = self.base_vars();
84 self.eval_book_info(&self.source.book_info, &html, &vars)
85 }
86
87 pub async fn toc(&self, toc_url: &str) -> Result<Toc> {
89 let toc = &self.source.toc;
90 let vars = self.base_vars();
91 let pages = self
92 .fetch_pages(toc_url, toc.next_page.as_ref(), toc.max_pages, &vars)
93 .await?;
94
95 let mut chapters: Vec<Chapter> = Vec::new();
96 let mut volumes: Vec<Volume> = Vec::new();
97 for page in &pages {
98 for item in eval_list(&toc.list, page)? {
99 let title = eval_value(&toc.name, &item, &vars)?;
100 let is_volume = match &toc.is_volume {
101 Some(r) => !eval_value(r, &item, &vars)?.trim().is_empty(),
102 None => false,
103 };
104 if is_volume {
105 volumes.push(Volume {
106 title,
107 first_chapter_index: chapters.len(),
108 });
109 } else {
110 let url = eval_value(&toc.url, &item, &vars)?;
111 chapters.push(Chapter {
112 title,
113 url,
114 is_volume: false,
115 });
116 }
117 }
118 }
119 Ok(Toc { chapters, volumes })
120 }
121
122 pub async fn content(&self, chapter_url: &str) -> Result<String> {
124 let c = &self.source.content;
125 let vars = self.base_vars();
126 let pages = self
127 .fetch_pages(chapter_url, c.next_page.as_ref(), c.max_pages, &vars)
128 .await?;
129 let mut parts = Vec::with_capacity(pages.len());
130 for page in &pages {
131 parts.push(eval_value(&c.value, page, &vars)?);
132 }
133 Ok(parts.join("\n"))
134 }
135
136 pub async fn search(&self, key: &str, page: u32, page_size: u32) -> Result<Vec<BookListItem>> {
138 let op = self
139 .source
140 .search
141 .as_ref()
142 .ok_or(BookSourceError::Missing("search"))?;
143 let mut vars = self.base_vars();
144 vars.insert("key".into(), key.to_string());
145 vars.insert("page".into(), page.to_string());
146 vars.insert("pageSize".into(), page_size.to_string());
147
148 let url = self.resolve_url(&op.request.url, &vars)?;
149 let body = match &op.request.body {
150 Some(b) => Some(self.resolve_url(b, &vars)?),
151 None => None,
152 };
153 let html = self
154 .fetcher
155 .fetch(FetchRequest {
156 url,
157 method: op.request.method,
158 body,
159 headers: op.request.headers.clone(),
160 })
161 .await?;
162 self.eval_list_items(&op.list, &op.item, &html)
163 }
164
165 pub async fn explore(
167 &self,
168 category_url: &UrlOrRule,
169 page: u32,
170 page_size: u32,
171 ) -> Result<Vec<BookListItem>> {
172 let op = self
173 .source
174 .explore
175 .as_ref()
176 .ok_or(BookSourceError::Missing("explore"))?;
177 let mut vars = self.base_vars();
178 vars.insert("page".into(), page.to_string());
179 vars.insert("pageSize".into(), page_size.to_string());
180 let url = self.resolve_url(category_url, &vars)?;
181 let html = self.fetcher.fetch(FetchRequest::get(url)).await?;
182 self.eval_list_items(&op.list, &op.item, &html)
183 }
184
185 pub fn explore_categories(&self) -> Vec<Category> {
187 self.source
188 .explore
189 .as_ref()
190 .map(|e| e.categories.clone())
191 .unwrap_or_default()
192 }
193
194 async fn fetch_pages(
198 &self,
199 start: &str,
200 next_page: Option<&Rule>,
201 max_pages: u32,
202 vars: &Vars,
203 ) -> Result<Vec<String>> {
204 let mut pages = Vec::new();
205 let mut url = start.to_string();
206 for _ in 0..max_pages.max(1) {
207 let html = self.fetcher.fetch(FetchRequest::get(url.clone())).await?;
208 let next = match next_page {
209 Some(r) => eval_value(r, &html, vars)?,
210 None => String::new(),
211 };
212 pages.push(html);
213 if next.trim().is_empty() {
214 break;
215 }
216 url = next;
217 }
218 Ok(pages)
219 }
220
221 fn eval_list_items(
222 &self,
223 list: &Rule,
224 item: &BookRules,
225 html: &str,
226 ) -> Result<Vec<BookListItem>> {
227 let vars = self.base_vars();
228 let mut out = Vec::new();
229 for ctx in eval_list(list, html)? {
230 let info = self.eval_book_info(item, &ctx, &vars)?;
231 let book_url = opt_eval(item.book_url.as_ref(), &ctx, &vars)?;
232 out.push(BookListItem { info, book_url });
233 }
234 Ok(out)
235 }
236
237 fn eval_book_info(&self, r: &BookRules, ctx: &str, vars: &Vars) -> Result<BookInfo> {
238 Ok(BookInfo {
239 name: opt_eval(r.name.as_ref(), ctx, vars)?,
240 author: opt_eval(r.author.as_ref(), ctx, vars)?,
241 cover: opt_eval(r.cover.as_ref(), ctx, vars)?,
242 intro: opt_eval(r.intro.as_ref(), ctx, vars)?,
243 kind: opt_eval(r.kind.as_ref(), ctx, vars)?,
244 last_chapter: opt_eval(r.last_chapter.as_ref(), ctx, vars)?,
245 toc_url: opt_eval(r.toc_url.as_ref(), ctx, vars)?,
246 word_count: opt_eval(r.word_count.as_ref(), ctx, vars)?,
247 })
248 }
249
250 fn resolve_url(&self, u: &UrlOrRule, vars: &Vars) -> Result<String> {
251 Ok(match u {
252 UrlOrRule::Str(s) => eval_value(
254 &Rule::Template {
255 template: s.clone(),
256 },
257 "",
258 vars,
259 )?,
260 UrlOrRule::Rule(r) => eval_value(r, "", vars)?,
261 })
262 }
263}
264
265fn opt_eval(rule: Option<&Rule>, ctx: &str, vars: &Vars) -> Result<String> {
267 Ok(match rule {
268 Some(r) => eval_value(r, ctx, vars)?,
269 None => String::new(),
270 })
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276 use crate::error::FetchError;
277 use crate::fetch::Fetcher;
278 use async_trait::async_trait;
279
280 struct MockFetcher(String);
282
283 #[async_trait]
284 impl Fetcher for MockFetcher {
285 async fn fetch(&self, _req: FetchRequest) -> std::result::Result<String, FetchError> {
286 Ok(self.0.clone())
287 }
288 }
289
290 const CATALOG: &str = r#"<html><body><div class="box">
291 <span id="shuqian"><h2 class="module-title type">阅读进度</h2></span>
292 <h2 class="module-title type">第一卷</h2>
293 <div class="module-row-info"><a class="module-row-text" href="/n/1.html"><div class="module-row-title"><span>第一章</span></div></a></div>
294 <div class="module-row-info"><a class="module-row-text" href="/n/2.html"><div class="module-row-title"><span>第二章</span></div></a></div>
295 <h2 class="module-title type">第二卷</h2>
296 <div class="module-row-info"><a class="module-row-text" href="/n/3.html"><div class="module-row-title"><span>第三章</span></div></a></div>
297 </div></body></html>"#;
298
299 const SOURCE: &str = r#"{
300 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
301 "bookInfo":{},
302 "toc":{
303 "list":{"via":"css","select":".box > h2.module-title.type, .box a.module-row-text"},
304 "name":{"firstOf":[{"via":"css","select":".module-row-title","extract":"text"},{"via":"css","select":"h2","extract":"text"}]},
305 "url":{"via":"css","select":"a","extract":{"attr":"href"}},
306 "isVolume":{"via":"css","select":"h2","extract":"text"},
307 "maxPages":1
308 },
309 "content":{"value":{"via":"css","select":".article-content","extract":"text"}}
310 }"#;
311
312 #[tokio::test]
313 async fn engine_toc_splits_volumes_offline() {
314 let src = BookSource::from_json(SOURCE).unwrap();
315 let engine = Engine::with_fetcher(src, Arc::new(MockFetcher(CATALOG.to_string())));
316 let toc = engine.toc("/any").await.unwrap();
317 assert_eq!(toc.volumes.len(), 2, "应识别 2 卷");
318 assert_eq!(toc.chapters.len(), 3, "应识别 3 章");
319 assert_eq!(toc.chapters[0].title, "第一章");
320 assert_eq!(toc.chapters[0].url, "/n/1.html");
321 assert_eq!(toc.volumes[1].first_chapter_index, 2);
322 }
323}