1use super::error::ConfigError;
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, HashMap};
9
10#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
14#[serde(rename_all = "lowercase")]
15#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
16pub enum Via {
17 #[default]
18 Css,
19 Xpath,
20 Json,
21 Regex,
22 Raw,
24}
25
26#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
28#[serde(untagged)]
29#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
30pub enum Extract {
31 Op(ExtractOp),
32 Attr { attr: String },
33}
34
35impl Default for Extract {
36 fn default() -> Self {
37 Extract::Op(ExtractOp::Text)
38 }
39}
40
41#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
43#[serde(rename_all = "camelCase")]
44#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
45pub enum ExtractOp {
46 #[default]
47 Text,
48 OwnText,
49 Html,
50 InnerHtml,
51 OuterHtml,
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
56#[serde(rename_all = "camelCase")]
57#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
58pub enum Codec {
59 Base64,
60 Base64url,
61 Hex,
62 Url,
64}
65
66#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
68#[serde(rename_all = "lowercase")]
69#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
70pub enum ByteEnc {
71 #[default]
72 Utf8,
73 Base64,
74 Hex,
75 Raw,
77}
78
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
81#[serde(rename_all = "lowercase")]
82#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
83pub enum HashAlgo {
84 Md5,
85 Sha1,
86 Sha256,
87 Sha512,
88}
89
90#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
92#[serde(rename_all = "lowercase")]
93#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
94pub enum HashOut {
95 #[default]
96 Hex,
97 Base64,
98}
99
100#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
102#[serde(rename_all = "camelCase", deny_unknown_fields)]
103#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
104pub struct HashStep {
105 pub algo: HashAlgo,
106 #[serde(default)]
107 pub output: HashOut,
108 #[serde(default, skip_serializing_if = "Option::is_none")]
110 pub hmac_key: Option<String>,
111 #[serde(default)]
112 pub hmac_key_enc: ByteEnc,
113}
114
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
117#[serde(rename_all = "camelCase")]
118#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
119pub enum CipherAlgo {
120 Aes,
121 Des,
122 TripleDes,
123}
124
125#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
127#[serde(rename_all = "lowercase")]
128#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
129pub enum CipherMode {
130 Cbc,
131 Ecb,
132 Cfb,
133 Gcm,
134}
135
136#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
138#[serde(rename_all = "lowercase")]
139#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
140pub enum Padding {
141 #[default]
142 Pkcs7,
143 Zero,
144 None,
145}
146
147#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
149#[serde(rename_all = "lowercase")]
150#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
151pub enum CipherOp {
152 #[default]
153 Decrypt,
154 Encrypt,
155}
156
157#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
159#[serde(rename_all = "camelCase", deny_unknown_fields)]
160#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
161pub struct CipherStep {
162 pub algo: CipherAlgo,
163 pub mode: CipherMode,
164 #[serde(default)]
165 pub padding: Padding,
166 #[serde(default)]
167 pub op: CipherOp,
168 pub key: String,
169 #[serde(default)]
170 pub key_enc: ByteEnc,
171 #[serde(default, skip_serializing_if = "Option::is_none")]
172 pub iv: Option<String>,
173 #[serde(default)]
174 pub iv_enc: ByteEnc,
175 #[serde(default, skip_serializing_if = "Option::is_none")]
177 pub input_enc: Option<ByteEnc>,
178 #[serde(default, skip_serializing_if = "Option::is_none")]
180 pub output_enc: Option<ByteEnc>,
181}
182
183#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
185#[serde(rename_all = "lowercase")]
186#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
187pub enum CnConvert {
188 T2s,
190 S2t,
192}
193
194#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
197#[serde(deny_unknown_fields)]
198#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
199pub struct CleanStep {
200 #[serde(default, skip_serializing_if = "Option::is_none")]
201 pub regex: Option<String>,
202 #[serde(default, skip_serializing_if = "Option::is_none")]
203 pub replace: Option<String>,
204 #[serde(default, skip_serializing_if = "Option::is_none")]
205 pub trim: Option<bool>,
206 #[serde(default, skip_serializing_if = "Option::is_none")]
207 pub prepend: Option<String>,
208 #[serde(default, skip_serializing_if = "Option::is_none")]
209 pub append: Option<String>,
210 #[serde(default, skip_serializing_if = "Option::is_none")]
212 pub decode: Option<Codec>,
213 #[serde(default, skip_serializing_if = "Option::is_none")]
215 pub encode: Option<Codec>,
216 #[serde(default, skip_serializing_if = "Option::is_none")]
218 pub hash: Option<HashStep>,
219 #[serde(default, skip_serializing_if = "Option::is_none")]
221 pub cipher: Option<CipherStep>,
222 #[serde(default, rename = "fontMap", skip_serializing_if = "Option::is_none")]
226 pub font_map: Option<std::collections::BTreeMap<String, String>>,
227 #[serde(default, skip_serializing_if = "Option::is_none")]
229 pub cn: Option<CnConvert>,
230 #[serde(default, skip_serializing_if = "Option::is_none")]
232 pub js: Option<String>,
233}
234
235#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
237#[serde(rename_all = "camelCase")]
238#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
239pub struct LeafRule {
240 #[serde(default)]
241 pub via: Via,
242 #[serde(default, skip_serializing_if = "Option::is_none")]
243 pub select: Option<String>,
244 #[serde(default, skip_serializing_if = "Option::is_none")]
245 pub index: Option<i64>,
246 #[serde(default)]
247 pub extract: Extract,
248 #[serde(default, skip_serializing_if = "Vec::is_empty")]
249 pub clean: Vec<CleanStep>,
250}
251
252#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
256#[serde(untagged)]
257#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
258pub enum Rule {
259 FirstOf {
261 #[serde(rename = "firstOf")]
262 first_of: Vec<Rule>,
263 },
264 Concat {
266 concat: Vec<Rule>,
267 #[serde(default)]
268 join: String,
269 },
270 Literal { literal: String },
272 Template { template: String },
274 Js { js: String },
278 Leaf(LeafRule),
280}
281
282#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
284#[serde(untagged)]
285#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
286pub enum UrlOrRule {
287 Str(String),
288 Rule(Box<Rule>),
289}
290
291#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
295#[serde(rename_all = "kebab-case")]
296#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
297pub enum Charset {
298 #[default]
299 Auto,
300 Utf8,
301 Gbk,
302 Gb18030,
303 Big5,
304}
305
306#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
308#[serde(rename_all = "camelCase", deny_unknown_fields)]
309#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
310pub struct Retry {
311 #[serde(default)]
312 pub max: u32,
313 #[serde(default)]
314 pub backoff_ms: u64,
315}
316
317#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
319#[serde(rename_all = "camelCase", deny_unknown_fields)]
320#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
321pub struct RateLimit {
322 pub max_count: u64,
323 pub per_ms: u64,
324}
325
326#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
329#[serde(rename_all = "lowercase")]
330#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
331pub enum FetchMode {
332 #[default]
334 Auto,
335 Reqwest,
337 Browser,
339}
340
341#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
343#[serde(rename_all = "camelCase", deny_unknown_fields)]
344#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
345pub struct Http {
346 #[serde(default)]
347 pub headers: HashMap<String, String>,
348 #[serde(default)]
350 pub cookies: HashMap<String, String>,
351 #[serde(default)]
353 pub warmup: Vec<String>,
354 #[serde(default)]
355 pub charset: Charset,
356 #[serde(default, skip_serializing_if = "Option::is_none")]
357 pub timeout: Option<u64>,
358 #[serde(default, skip_serializing_if = "Option::is_none")]
359 pub retry: Option<Retry>,
360 #[serde(default, skip_serializing_if = "Option::is_none")]
361 pub rate_limit: Option<RateLimit>,
362 #[serde(default)]
364 pub fetcher: FetchMode,
365}
366
367#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
369#[serde(rename_all = "lowercase")]
370#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
371pub enum RowUiType {
372 #[default]
374 Text,
375 Password,
377 Select,
379 Toggle,
381}
382
383#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
385#[serde(rename_all = "camelCase", deny_unknown_fields)]
386#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
387pub struct RowUi {
388 pub name: String,
390 #[serde(rename = "type", default)]
391 pub ui_type: RowUiType,
392 #[serde(default, skip_serializing_if = "Vec::is_empty")]
394 pub options: Vec<String>,
395}
396
397#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
399#[serde(rename_all = "UPPERCASE")]
400#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
401pub enum Method {
402 #[default]
403 Get,
404 Post,
405}
406
407#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, Hash)]
410#[serde(rename_all = "lowercase")]
411#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
412pub enum VarScope {
413 #[default]
416 Chapter,
417 Book,
421 Source,
424}
425
426#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
429#[serde(rename_all = "camelCase", deny_unknown_fields)]
430#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
431pub struct Capture {
432 pub name: String,
434 pub value: Rule,
436 #[serde(default)]
438 pub scope: VarScope,
439}
440
441#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
445#[serde(rename_all = "camelCase", deny_unknown_fields)]
446#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
447pub struct PreStep {
448 pub url: UrlOrRule,
449 #[serde(default)]
450 pub method: Method,
451 #[serde(default, skip_serializing_if = "Option::is_none")]
452 pub body: Option<UrlOrRule>,
453 #[serde(default)]
455 pub headers: HashMap<String, String>,
456 #[serde(default, skip_serializing_if = "Vec::is_empty")]
458 pub capture: Vec<Capture>,
459 #[serde(default, skip_serializing_if = "Vec::is_empty")]
461 pub skip_if_present: Vec<String>,
462}
463
464#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
466#[serde(rename_all = "camelCase", deny_unknown_fields)]
467#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
468pub struct Request {
469 pub url: UrlOrRule,
470 #[serde(default)]
471 pub method: Method,
472 #[serde(default, skip_serializing_if = "Option::is_none")]
473 pub body: Option<UrlOrRule>,
474 #[serde(default)]
475 pub headers: HashMap<String, String>,
476 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
481 pub vars: BTreeMap<String, Rule>,
482}
483
484#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
488#[serde(rename_all = "camelCase", deny_unknown_fields)]
489#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
490pub struct BookRules {
491 #[serde(default, skip_serializing_if = "Option::is_none")]
493 pub book_url: Option<Rule>,
494 #[serde(default, skip_serializing_if = "Option::is_none")]
495 pub name: Option<Rule>,
496 #[serde(default, skip_serializing_if = "Option::is_none")]
497 pub author: Option<Rule>,
498 #[serde(default, skip_serializing_if = "Option::is_none")]
499 pub cover: Option<Rule>,
500 #[serde(default, skip_serializing_if = "Option::is_none")]
501 pub intro: Option<Rule>,
502 #[serde(default, skip_serializing_if = "Option::is_none")]
503 pub kind: Option<Rule>,
504 #[serde(default, skip_serializing_if = "Option::is_none")]
505 pub last_chapter: Option<Rule>,
506 #[serde(default, skip_serializing_if = "Option::is_none")]
507 pub toc_url: Option<Rule>,
508 #[serde(default, skip_serializing_if = "Option::is_none")]
509 pub word_count: Option<Rule>,
510}
511
512#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
516#[serde(rename_all = "camelCase", deny_unknown_fields)]
517#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
518pub struct BookInfoOp {
519 #[serde(default, skip_serializing_if = "Option::is_none")]
520 pub book_url: Option<Rule>,
521 #[serde(default, skip_serializing_if = "Option::is_none")]
522 pub name: Option<Rule>,
523 #[serde(default, skip_serializing_if = "Option::is_none")]
524 pub author: Option<Rule>,
525 #[serde(default, skip_serializing_if = "Option::is_none")]
526 pub cover: Option<Rule>,
527 #[serde(default, skip_serializing_if = "Option::is_none")]
528 pub intro: Option<Rule>,
529 #[serde(default, skip_serializing_if = "Option::is_none")]
530 pub kind: Option<Rule>,
531 #[serde(default, skip_serializing_if = "Option::is_none")]
532 pub last_chapter: Option<Rule>,
533 #[serde(default, skip_serializing_if = "Option::is_none")]
534 pub toc_url: Option<Rule>,
535 #[serde(default, skip_serializing_if = "Option::is_none")]
536 pub word_count: Option<Rule>,
537 #[serde(default, skip_serializing_if = "Vec::is_empty")]
539 pub prelude: Vec<PreStep>,
540}
541
542impl BookInfoOp {
543 pub fn as_book_rules(&self) -> BookRules {
545 BookRules {
546 book_url: self.book_url.clone(),
547 name: self.name.clone(),
548 author: self.author.clone(),
549 cover: self.cover.clone(),
550 intro: self.intro.clone(),
551 kind: self.kind.clone(),
552 last_chapter: self.last_chapter.clone(),
553 toc_url: self.toc_url.clone(),
554 word_count: self.word_count.clone(),
555 }
556 }
557}
558
559#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
561#[serde(deny_unknown_fields)]
562#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
563pub struct SearchOp {
564 #[serde(default, skip_serializing_if = "Vec::is_empty")]
566 pub prelude: Vec<PreStep>,
567 pub request: Request,
568 pub list: Rule,
569 pub item: BookRules,
570}
571
572#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
574#[serde(deny_unknown_fields)]
575#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
576pub struct Category {
577 pub title: String,
578 pub url: UrlOrRule,
579}
580
581#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
583#[serde(deny_unknown_fields)]
584#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
585pub struct ExploreOp {
586 #[serde(default, skip_serializing_if = "Vec::is_empty")]
588 pub prelude: Vec<PreStep>,
589 pub categories: Vec<Category>,
590 pub list: Rule,
591 pub item: BookRules,
592}
593
594fn default_max_pages() -> u32 {
595 100
596}
597
598#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
600#[serde(rename_all = "camelCase", deny_unknown_fields)]
601#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
602pub struct TocRules {
603 #[serde(default, skip_serializing_if = "Vec::is_empty")]
605 pub prelude: Vec<PreStep>,
606 pub list: Rule,
607 pub name: Rule,
608 pub url: Rule,
609 #[serde(default, skip_serializing_if = "Option::is_none")]
610 pub is_volume: Option<Rule>,
611 #[serde(default, skip_serializing_if = "Option::is_none")]
612 pub next_page: Option<Rule>,
613 #[serde(default = "default_max_pages")]
614 pub max_pages: u32,
615}
616
617#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
619#[serde(rename_all = "camelCase", deny_unknown_fields)]
620#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
621pub struct ContentRules {
622 #[serde(default, skip_serializing_if = "Vec::is_empty")]
624 pub prelude: Vec<PreStep>,
625 pub value: Rule,
626 #[serde(default, skip_serializing_if = "Option::is_none")]
627 pub next_page: Option<Rule>,
628 #[serde(default = "default_max_pages")]
629 pub max_pages: u32,
630}
631
632#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
636#[serde(rename_all = "camelCase")]
637#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
638pub struct Expect {
639 #[serde(default, skip_serializing_if = "Option::is_none")]
640 pub name: Option<String>,
641 #[serde(default, skip_serializing_if = "Option::is_none")]
642 pub min_chapters: Option<usize>,
643 #[serde(default, skip_serializing_if = "Option::is_none")]
644 pub volumes: Option<usize>,
645 #[serde(default, skip_serializing_if = "Option::is_none")]
646 pub min_content_chars: Option<usize>,
647}
648
649#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
651#[serde(rename_all = "camelCase", deny_unknown_fields)]
652#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
653pub struct Sample {
654 pub book_url: String,
655 #[serde(default)]
656 pub expect: Expect,
657}
658
659#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
663#[serde(rename_all = "camelCase", deny_unknown_fields)]
664#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
665pub struct BookSource {
666 pub schema: String,
668 pub name: String,
669 #[serde(default, skip_serializing_if = "String::is_empty")]
670 pub group: String,
671 pub url: String,
673 #[serde(default)]
674 pub http: Http,
675 #[serde(default, skip_serializing_if = "String::is_empty")]
678 pub login_url: String,
679 #[serde(default, skip_serializing_if = "Vec::is_empty")]
681 pub login_ui: Vec<RowUi>,
682 #[serde(default, skip_serializing_if = "String::is_empty")]
684 pub login_check_js: String,
685 #[serde(default)]
687 pub enabled_cookie_jar: bool,
688 #[serde(default, skip_serializing_if = "String::is_empty")]
690 pub concurrent_rate: String,
691 #[serde(default, skip_serializing_if = "Option::is_none")]
692 pub search: Option<SearchOp>,
693 #[serde(default, skip_serializing_if = "Option::is_none")]
694 pub explore: Option<ExploreOp>,
695 pub book_info: BookInfoOp,
696 pub toc: TocRules,
697 pub content: ContentRules,
698 #[serde(default, skip_serializing_if = "Vec::is_empty")]
699 pub samples: Vec<Sample>,
700}
701
702pub const SCHEMA_ID: &str = "trnovel-booksource/v2";
704
705impl BookSource {
706 pub fn from_json(s: &str) -> Result<Self, ConfigError> {
708 Ok(serde_json::from_str(s)?)
709 }
710
711 pub fn has_login(&self) -> bool {
714 !self.login_url.trim().is_empty() || !self.login_ui.is_empty()
715 }
716
717 pub fn get_login_js(&self) -> Option<&str> {
720 let s = self.login_url.trim();
721 if let Some(js) = s.strip_prefix("@js:") {
722 Some(js.trim())
723 } else if let Some(rest) = s.strip_prefix("<js>") {
724 Some(rest.strip_suffix("</js>").unwrap_or(rest).trim())
725 } else {
726 None
727 }
728 }
729
730 pub fn from_value_many(value: serde_json::Value) -> Result<Vec<Self>, ConfigError> {
732 if value.is_array() {
733 Ok(serde_json::from_value(value)?)
734 } else {
735 Ok(vec![serde_json::from_value(value)?])
736 }
737 }
738
739 pub fn from_path(path: &str) -> Result<Vec<Self>, super::error::BookSourceError> {
741 let text = std::fs::read_to_string(path).map_err(ConfigError::Io)?;
742 let value = serde_json::from_str(&text).map_err(ConfigError::Json)?;
743 Ok(Self::from_value_many(value)?)
744 }
745
746 pub async fn from_url(url: &str) -> Result<Vec<Self>, super::error::BookSourceError> {
748 use super::error::FetchError;
749 let text = reqwest::get(url)
750 .await
751 .map_err(FetchError::Http)?
752 .error_for_status()
754 .map_err(FetchError::Http)?
755 .text()
756 .await
757 .map_err(FetchError::Http)?;
758 let value = serde_json::from_str(&text).map_err(ConfigError::Json)?;
759 Ok(Self::from_value_many(value)?)
760 }
761}
762
763#[cfg(test)]
764mod tests {
765 use super::*;
766
767 const BILIXS_V2: &str = r#"{
770 "schema": "trnovel-booksource/v2",
771 "name": "哔哩小说",
772 "group": "测试",
773 "url": "https://www.bilixs.com",
774 "http": {
775 "headers": { "User-Agent": "Mozilla/5.0" },
776 "cookies": {},
777 "warmup": ["https://www.bilixs.com/"],
778 "charset": "auto",
779 "timeout": 15000,
780 "retry": { "max": 2, "backoffMs": 500 }
781 },
782 "search": {
783 "request": { "url": { "template": "{{base}}/search.html?searchkey={{key}}" }, "method": "GET" },
784 "list": { "via": "css", "select": ".module-item" },
785 "item": {
786 "name": { "via": "css", "select": ".module-item-title", "extract": "text" },
787 "tocUrl": { "via": "css", "select": ".module-item-title", "extract": { "attr": "href" } }
788 }
789 },
790 "explore": {
791 "categories": [ { "title": "最近更新", "url": { "template": "{{base}}/book/lastupdate_0_1_0_0_0_0_0_{{page}}_0.html" } } ],
792 "list": { "via": "css", "select": ".module-item" },
793 "item": { "name": { "via": "css", "select": ".module-item-title", "extract": "text" } }
794 },
795 "bookInfo": {
796 "name": { "via": "css", "select": "[property=\"og:novel:book_name\"]", "extract": { "attr": "content" } },
797 "cover": { "via": "css", "select": "[property=\"og:image\"]", "extract": { "attr": "content" } },
798 "kind": { "concat": [
799 { "via": "css", "select": "[property=\"og:novel:tags\"]", "extract": { "attr": "content" } },
800 { "via": "css", "select": "[property=\"og:novel:status\"]", "extract": { "attr": "content" } }
801 ], "join": " · " },
802 "tocUrl": { "via": "css", "select": "[property=\"og:novel:read_url\"]", "extract": { "attr": "content" } }
803 },
804 "toc": {
805 "list": { "via": "css", "select": ".box > h2.module-title.type, .box a.module-row-text" },
806 "name": { "firstOf": [
807 { "via": "css", "select": ".module-row-title", "extract": "text" },
808 { "via": "css", "select": "h2", "extract": "text" }
809 ] },
810 "url": { "via": "css", "select": "a", "extract": { "attr": "href" } },
811 "isVolume": { "via": "css", "select": "h2", "extract": "text" },
812 "maxPages": 1
813 },
814 "content": {
815 "value": { "via": "css", "select": ".article-content", "extract": "html",
816 "clean": [ { "regex": "请收藏本站[^<\\n]*", "replace": "" }, { "trim": true } ] }
817 },
818 "samples": [
819 { "bookUrl": "/novel/guzhenren.html", "expect": { "name": "蛊真人", "volumes": 8, "minChapters": 2000 } }
820 ]
821 }"#;
822
823 #[test]
824 fn parses_v2_book_source() {
825 let bs = BookSource::from_json(BILIXS_V2).expect("应解析 v2 书源");
826 assert_eq!(bs.schema, SCHEMA_ID);
827 assert_eq!(bs.name, "哔哩小说");
828 }
829
830 #[test]
831 fn toc_name_is_firstof_with_two_leaves() {
832 let bs = BookSource::from_json(BILIXS_V2).unwrap();
833 match &bs.toc.name {
834 Rule::FirstOf { first_of } => assert_eq!(first_of.len(), 2),
835 other => panic!("toc.name 应为 firstOf,实际 {other:?}"),
836 }
837 }
838
839 #[test]
840 fn toc_is_volume_is_leaf_css_h2() {
841 let bs = BookSource::from_json(BILIXS_V2).unwrap();
842 let iv = bs.toc.is_volume.as_ref().expect("isVolume 应存在");
843 match iv {
844 Rule::Leaf(l) => {
845 assert_eq!(l.via, Via::Css);
846 assert_eq!(l.select.as_deref(), Some("h2"));
847 }
848 other => panic!("isVolume 应为叶子,实际 {other:?}"),
849 }
850 }
851
852 #[test]
853 fn search_url_is_template_rule() {
854 let bs = BookSource::from_json(BILIXS_V2).unwrap();
855 let req = &bs.search.as_ref().unwrap().request;
856 match &req.url {
857 UrlOrRule::Rule(r) => assert!(matches!(**r, Rule::Template { .. })),
858 other => panic!("search.request.url 应为模板规则,实际 {other:?}"),
859 }
860 }
861
862 #[test]
863 fn book_info_cover_extracts_attr() {
864 let bs = BookSource::from_json(BILIXS_V2).unwrap();
865 match bs.book_info.cover.as_ref().unwrap() {
866 Rule::Leaf(l) => assert_eq!(
867 l.extract,
868 Extract::Attr {
869 attr: "content".into()
870 }
871 ),
872 other => panic!("cover 应为属性抽取叶子,实际 {other:?}"),
873 }
874 }
875
876 #[test]
877 fn http_cookies_and_warmup_parsed() {
878 let bs = BookSource::from_json(BILIXS_V2).unwrap();
879 assert_eq!(bs.http.warmup, vec!["https://www.bilixs.com/"]);
880 assert_eq!(bs.http.charset, Charset::Auto);
881 assert_eq!(bs.http.retry.as_ref().unwrap().backoff_ms, 500);
882 }
883
884 #[test]
885 fn sample_expectations_parsed() {
886 let bs = BookSource::from_json(BILIXS_V2).unwrap();
887 let s = &bs.samples[0];
888 assert_eq!(s.expect.volumes, Some(8));
889 assert_eq!(s.expect.min_chapters, Some(2000));
890 }
891
892 #[test]
893 fn round_trips_through_json() {
894 let bs = BookSource::from_json(BILIXS_V2).unwrap();
895 let json = serde_json::to_string(&bs).unwrap();
896 let bs2 = BookSource::from_json(&json).unwrap();
897 assert_eq!(bs, bs2);
898 }
899
900 #[test]
901 fn rejects_unknown_top_level_field() {
902 let bad = BILIXS_V2.replacen("\"name\":", "\"nmae\":", 1);
903 assert!(
904 BookSource::from_json(&bad).is_err(),
905 "拼错字段应被 deny_unknown_fields 拒绝"
906 );
907 }
908
909 #[test]
911 fn has_login_when_login_url_or_login_ui_present() {
912 let mut bs = BookSource::from_json(BILIXS_V2).unwrap();
913 assert!(!bs.has_login(), "默认无 loginUrl/loginUi 不需登录");
914 bs.login_url = "https://site/login".into();
915 assert!(bs.has_login());
916 bs.login_url = "@js:function login(){}".into();
917 assert!(bs.has_login());
918 bs.login_url = " ".into();
919 assert!(!bs.has_login(), "纯空白 loginUrl 视为不需登录");
920 bs.login_ui = vec![RowUi {
922 name: "用户名".into(),
923 ..Default::default()
924 }];
925 assert!(bs.has_login(), "loginUi 非空应计入登录入口判定");
926 }
927
928 #[test]
930 fn get_login_js_strips_prefixes() {
931 let mut bs = BookSource::from_json(BILIXS_V2).unwrap();
932 bs.login_url = "@js: function login(){} ".into();
933 assert_eq!(bs.get_login_js(), Some("function login(){}"));
934 bs.login_url = "<js>BODY</js>".into();
935 assert_eq!(bs.get_login_js(), Some("BODY"));
936 bs.login_url = "<js> A </js>".into();
937 assert_eq!(bs.get_login_js(), Some("A"));
938 bs.login_url = "<js>BODY".into(); assert_eq!(bs.get_login_js(), Some("BODY"));
940 bs.login_url = "https://site/login".into(); assert_eq!(bs.get_login_js(), None);
942 bs.login_url = "".into();
943 assert_eq!(bs.get_login_js(), None);
944 }
945
946 #[test]
948 fn parses_prelude_capture_and_round_trips() {
949 let json = r#"{
950 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
951 "search":{
952 "prelude":[{
953 "url":{"template":"{{base}}/prepare"},
954 "capture":[{"name":"token","value":{"via":"json","select":"$.token"},"scope":"source"}],
955 "skipIfPresent":["token"]
956 }],
957 "request":{"url":{"template":"{{base}}/s?token={{token}}"}},
958 "list":{"via":"css","select":".i"},
959 "item":{"name":{"via":"css","select":".t","extract":"text"}}
960 },
961 "bookInfo":{"prelude":[{"url":{"template":"{{base}}/p"},"capture":[{"name":"csrf","value":{"via":"raw"}}]}]},
962 "toc":{"list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a","extract":{"attr":"href"}}},
963 "content":{"value":{"via":"css","select":".c"}}
964 }"#;
965 let bs = BookSource::from_json(json).expect("应解析含 prelude 的书源");
966 let sp = &bs.search.as_ref().unwrap().prelude;
967 assert_eq!(sp.len(), 1);
968 assert_eq!(sp[0].capture[0].name, "token");
969 assert_eq!(sp[0].capture[0].scope, VarScope::Source);
970 assert_eq!(sp[0].skip_if_present, vec!["token".to_string()]);
971 assert_eq!(bs.book_info.prelude[0].capture[0].scope, VarScope::Chapter);
973 let s = serde_json::to_string(&bs).unwrap();
975 assert_eq!(BookSource::from_json(&s).unwrap(), bs);
976 }
977
978 #[test]
979 fn prestep_rejects_unknown_field() {
980 let bad = r#"{
981 "schema":"trnovel-booksource/v2","name":"t","url":"https://x",
982 "toc":{"prelude":[{"url":{"template":"{{base}}/p"},"captuer":[]}],
983 "list":{"via":"css","select":"a"},"name":{"via":"css","select":"a"},"url":{"via":"css","select":"a"}},
984 "bookInfo":{},
985 "content":{"value":{"via":"css","select":".c"}}
986 }"#;
987 assert!(
988 BookSource::from_json(bad).is_err(),
989 "PreStep 拼错字段(captuer)应被 deny_unknown_fields 拒"
990 );
991 }
992
993 #[test]
994 fn existing_source_serializes_without_new_fields() {
995 let bs = BookSource::from_json(BILIXS_V2).unwrap();
997 let json = serde_json::to_string(&bs).unwrap();
998 assert!(!json.contains("prelude"), "无前置链不应序列化 prelude");
999 assert!(!json.contains("\"vars\""), "空 vars 不应序列化");
1000 assert!(!json.contains("skipIfPresent"));
1001 assert!(!json.contains("\"capture\""));
1002 }
1003}
1004
1005#[cfg(all(test, feature = "schema"))]
1008mod schema_sync {
1009 #[test]
1010 fn schema_is_in_sync() {
1011 let generated =
1012 serde_json::to_string_pretty(&schemars::schema_for!(crate::BookSource)).unwrap();
1013 let committed = include_str!("../book-source.schema.json");
1014 assert_eq!(
1015 generated.trim(),
1016 committed.trim(),
1017 "book-source.schema.json 与配置类型不同步;请重新生成:\n \
1018 cargo run -p parse-book-source --features schema --example gen_schema \
1019 > crates/parse-book-source/book-source.schema.json"
1020 );
1021 }
1022}