Skip to main content

parse_book_source/
verify.rs

1//! 样例校验回路:对 `samples` 跑完整流程并断言可执行不变量,返回结构化报告。
2//! 同一套断言用于生成期校验与运行期监控(见 design D5)。
3
4use super::engine::Engine;
5use super::error::{BookSourceError, Result};
6use super::source::Sample;
7
8/// 把取页/求值错误转为简短诊断文案;**反爬挑战给精确提示**而非笼统失败。
9fn err_detail(e: &BookSourceError) -> String {
10    if e.is_challenge() {
11        "被反爬挑战拦截(如 Cloudflare),需浏览器辅助或改用浏览".into()
12    } else {
13        e.to_string()
14    }
15}
16
17#[cfg(test)]
18mod tests {
19    use super::*;
20    use crate::error::FetchError;
21    use crate::fetch::{FetchRequest, Fetcher};
22    use crate::source::BookSource;
23    use async_trait::async_trait;
24    use std::sync::Arc;
25
26    /// 对所有 URL 返回同一段组合 HTML(同时含 og:meta、目录、卡片、正文),
27    /// 使 book_info/toc/content/explore 各取所需,离线全流程体检。
28    struct MockFetcher(String);
29    #[async_trait]
30    impl Fetcher for MockFetcher {
31        async fn fetch(&self, _req: FetchRequest) -> std::result::Result<String, FetchError> {
32            Ok(self.0.clone())
33        }
34    }
35
36    /// 对任何 URL 都返回「被反爬挑战」错误,模拟整站被 Cloudflare 挑战。
37    struct ChallengeFetcher;
38    #[async_trait]
39    impl Fetcher for ChallengeFetcher {
40        async fn fetch(&self, _req: FetchRequest) -> std::result::Result<String, FetchError> {
41            Err(FetchError::Challenged("Cloudflare/反爬挑战 @ test".into()))
42        }
43    }
44
45    const HTML: &str = r#"<html><head>
46        <meta property="og:novel:book_name" content="测试书">
47        <meta property="og:novel:read_url" content="/toc">
48      </head><body>
49        <div class="module-item"><a class="module-item-title" href="/b1">书一</a></div>
50        <div class="box">
51          <h2 class="module-title type">第一卷</h2>
52          <div class="module-row-info"><a class="module-row-text" href="/c1"><div class="module-row-title"><span>第一章</span></div></a></div>
53        </div>
54        <div class="article-content"><p>正文内容。</p></div>
55      </body></html>"#;
56
57    const SOURCE: &str = r#"{
58      "schema":"trnovel-booksource/v2","name":"mock","url":"https://x",
59      "search":{"request":{"url":{"template":"{{base}}/s?q={{key}}"}},
60                "list":{"via":"css","select":".module-item"},
61                "item":{"bookUrl":{"via":"css","select":".module-item-title","extract":{"attr":"href"}},"name":{"via":"css","select":".module-item-title","extract":"text"}}},
62      "explore":{"categories":[{"title":"全部","url":{"template":"{{base}}/all_{{page}}"}}],
63                 "list":{"via":"css","select":".module-item"},
64                 "item":{"bookUrl":{"via":"css","select":".module-item-title","extract":{"attr":"href"}},"name":{"via":"css","select":".module-item-title","extract":"text"}}},
65      "bookInfo":{"name":{"via":"css","select":"[property=\"og:novel:book_name\"]","extract":{"attr":"content"}},
66                  "tocUrl":{"via":"css","select":"[property=\"og:novel:read_url\"]","extract":{"attr":"content"}}},
67      "toc":{"list":{"via":"css","select":".box > h2.module-title.type, .box a.module-row-text"},
68             "name":{"firstOf":[{"via":"css","select":".module-row-title","extract":"text"},{"via":"css","select":"h2","extract":"text"}]},
69             "url":{"via":"css","select":"a","extract":{"attr":"href"}},
70             "isVolume":{"via":"css","select":"h2","extract":"text"},"maxPages":1},
71      "content":{"value":{"via":"css","select":".article-content","extract":"html"}},
72      "samples":[{"bookUrl":"/b1","expect":{"name":"测试书"}}]
73    }"#;
74
75    #[tokio::test]
76    async fn diagnose_all_capabilities_pass_offline() {
77        let src = BookSource::from_json(SOURCE).unwrap();
78        let engine = Engine::with_fetcher(src, Arc::new(MockFetcher(HTML.to_string())));
79        let report = diagnose(&engine).await;
80        assert!(report.healthy(), "应全部通过,实际: {report}");
81        // 6 项检查:配置/浏览/书详情/目录/正文/搜索
82        assert_eq!(report.checks.len(), 6);
83        let toc = report.checks.iter().find(|c| c.name == "目录").unwrap();
84        assert_eq!(toc.status, CheckStatus::Pass);
85        assert!(toc.detail.contains("1 卷 / 1 章"));
86    }
87
88    #[tokio::test]
89    async fn verify_sample_offline() {
90        let src = BookSource::from_json(SOURCE).unwrap();
91        let engine = Engine::with_fetcher(src.clone(), Arc::new(MockFetcher(HTML.to_string())));
92        let report = verify_sample(&engine, &src.samples[0]).await.unwrap();
93        assert!(report.passed, "failures: {:?}", report.failures);
94        assert_eq!(report.name, "测试书");
95        assert_eq!(report.chapters, 1);
96        assert_eq!(report.volumes, 1);
97    }
98
99    #[tokio::test]
100    async fn diagnose_reports_challenge_precisely() {
101        let src = BookSource::from_json(SOURCE).unwrap();
102        let engine = Engine::with_fetcher(src, Arc::new(ChallengeFetcher));
103        let report = diagnose(&engine).await;
104        assert!(!report.healthy(), "被挑战应不健康");
105        // 反爬挑战项给精确提示,而非笼统的错误字符串。
106        assert!(
107            report.checks.iter().any(|c| c.detail.contains("反爬挑战")),
108            "应有精确反爬提示,实际: {report}"
109        );
110    }
111}
112
113/// 一个样例的校验结果。
114#[derive(Debug, Default, Clone)]
115pub struct VerifyReport {
116    /// 是否全部不变量通过。
117    pub passed: bool,
118    /// 失败项的可读描述(期望 vs 实际)。
119    pub failures: Vec<String>,
120    pub name: String,
121    pub chapters: usize,
122    pub volumes: usize,
123    pub content_chars: usize,
124}
125
126/// 对单个样例跑 book_info → toc → 首章 content,并校验不变量。
127pub async fn verify_sample(engine: &Engine, sample: &Sample) -> Result<VerifyReport> {
128    let mut report = VerifyReport::default();
129
130    let info = engine.book_info(&sample.book_url).await?;
131    report.name = info.name.clone();
132    if info.name.trim().is_empty() {
133        report.failures.push("bookInfo.name 为空".into());
134    }
135
136    let toc_url = if info.toc_url.trim().is_empty() {
137        sample.book_url.clone()
138    } else {
139        info.toc_url.clone()
140    };
141    let toc = engine.toc(&toc_url).await?;
142    report.chapters = toc.chapters.len();
143    report.volumes = toc.volumes.len();
144    if toc.chapters.is_empty() {
145        report.failures.push("目录无章节".into());
146    }
147
148    if let Some(first) = toc.chapters.first() {
149        let content = engine.content(&first.url).await?;
150        report.content_chars = content.chars().count();
151    }
152
153    let e = &sample.expect;
154    if let Some(n) = &e.name
155        && &info.name != n
156    {
157        report
158            .failures
159            .push(format!("name 期望 {:?},实际 {:?}", n, info.name));
160    }
161    if let Some(m) = e.min_chapters
162        && report.chapters < m
163    {
164        report
165            .failures
166            .push(format!("章节数 {} < 期望 {}", report.chapters, m));
167    }
168    if let Some(v) = e.volumes
169        && report.volumes != v
170    {
171        report
172            .failures
173            .push(format!("卷数 {} != 期望 {}", report.volumes, v));
174    }
175    if let Some(c) = e.min_content_chars
176        && report.content_chars < c
177    {
178        report
179            .failures
180            .push(format!("正文 {} 字 < 期望 {}", report.content_chars, c));
181    }
182
183    report.passed = report.failures.is_empty();
184    Ok(report)
185}
186
187// ───────────────────────── 全流程体检(doctor)─────────────────────────
188
189/// 单项检查状态。
190#[derive(Debug, Clone, Copy, PartialEq, Eq)]
191pub enum CheckStatus {
192    /// 正常(✓)。
193    Pass,
194    /// 异常(✗)。
195    Fail,
196    /// 跳过(未配置 / 缺前置条件,○)。
197    Skip,
198}
199
200impl CheckStatus {
201    /// 状态符号:✓ / ✗ / ○。
202    pub fn symbol(&self) -> char {
203        match self {
204            CheckStatus::Pass => '✓',
205            CheckStatus::Fail => '✗',
206            CheckStatus::Skip => '○',
207        }
208    }
209}
210
211/// 一项能力的检查结果。
212#[derive(Debug, Clone)]
213pub struct Check {
214    pub name: &'static str,
215    pub status: CheckStatus,
216    pub detail: String,
217}
218
219impl Check {
220    fn pass(name: &'static str, detail: impl Into<String>) -> Self {
221        Self {
222            name,
223            status: CheckStatus::Pass,
224            detail: detail.into(),
225        }
226    }
227    fn fail(name: &'static str, detail: impl Into<String>) -> Self {
228        Self {
229            name,
230            status: CheckStatus::Fail,
231            detail: detail.into(),
232        }
233    }
234    fn skip(name: &'static str, detail: impl Into<String>) -> Self {
235        Self {
236            name,
237            status: CheckStatus::Skip,
238            detail: detail.into(),
239        }
240    }
241}
242
243/// 体检报告:逐能力的 ✓/✗/○ 列表。
244#[derive(Debug, Clone)]
245pub struct DiagnoseReport {
246    pub source_name: String,
247    pub checks: Vec<Check>,
248}
249
250impl DiagnoseReport {
251    /// 是否无任何异常项(Skip 不算失败)。
252    pub fn healthy(&self) -> bool {
253        self.checks.iter().all(|c| c.status != CheckStatus::Fail)
254    }
255}
256
257impl std::fmt::Display for DiagnoseReport {
258    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
259        writeln!(f, "书源诊断:{}", self.source_name)?;
260        for c in &self.checks {
261            writeln!(f, "  {} {:<6} {}", c.status.symbol(), c.name, c.detail)?;
262        }
263        Ok(())
264    }
265}
266
267/// 全流程体检:逐项跑「配置 / 浏览 / 搜索 / 书详情 / 目录 / 正文」并报告 ✓/✗/○。
268///
269/// 无 `samples` 时,会尝试用浏览/搜索探到的第一本书来测书详情/目录/正文,
270/// 使没有样例的 AI 生成书源也能被全流程验证。
271pub async fn diagnose(engine: &Engine) -> DiagnoseReport {
272    engine.warmup().await;
273    let src = engine.source();
274    let mut checks = Vec::new();
275
276    // 能构造出 Engine 即说明配置已成功解析。
277    checks.push(Check::pass("配置", format!("书源「{}」", src.name)));
278
279    // 浏览(同时探一个可用 book_url 供读取路径在无样例时使用)
280    let mut probe_book_url: Option<String> = None;
281    if src.explore.is_some() {
282        match engine.explore_categories().first() {
283            Some(cat) => match engine.explore(&cat.url, 1, 20).await {
284                Ok(books) if !books.is_empty() => {
285                    probe_book_url = books
286                        .iter()
287                        .find(|b| !b.book_url.is_empty())
288                        .map(|b| b.book_url.clone());
289                    checks.push(Check::pass(
290                        "浏览",
291                        format!("{} 本(分类「{}」)", books.len(), cat.title),
292                    ));
293                }
294                Ok(_) => checks.push(Check::fail("浏览", "结果为空")),
295                Err(e) => checks.push(Check::fail("浏览", err_detail(&e))),
296            },
297            None => checks.push(Check::skip("浏览", "未配置分类")),
298        }
299    } else {
300        checks.push(Check::skip("浏览", "未配置"));
301    }
302
303    // 读取路径(书详情→目录→正文)先于搜索:这是最可靠的链路,
304    // 避免被搜索可能触发的反爬(如 Cloudflare)影响后续请求。
305    let book_url = src
306        .samples
307        .first()
308        .map(|s| s.book_url.clone())
309        .or(probe_book_url);
310    read_path_checks(engine, book_url, &mut checks).await;
311
312    // 搜索最后做(最易触发反爬;失败也不影响其它检查项)
313    if src.search.is_some() {
314        match src.samples.first().and_then(|s| s.expect.name.clone()) {
315            Some(q) => match engine.search(&q, 1, 20).await {
316                Ok(books) if !books.is_empty() => {
317                    checks.push(Check::pass("搜索", format!("「{q}」→ {} 本", books.len())))
318                }
319                Ok(_) => checks.push(Check::fail("搜索", format!("「{q}」无结果"))),
320                Err(e) => checks.push(Check::fail("搜索", err_detail(&e))),
321            },
322            None => checks.push(Check::skip("搜索", "无样例查询词 samples[].expect.name")),
323        }
324    } else {
325        checks.push(Check::skip("搜索", "未配置"));
326    }
327
328    DiagnoseReport {
329        source_name: src.name.clone(),
330        checks,
331    }
332}
333
334/// 读取路径检查:书详情 → 目录 → 正文。把结果追加到 `checks`(失败不 panic、不冒泡)。
335async fn read_path_checks(engine: &Engine, book_url: Option<String>, checks: &mut Vec<Check>) {
336    let Some(book_url) = book_url else {
337        checks.push(Check::skip("书详情", "无 book_url(需 samples 或可浏览)"));
338        checks.push(Check::skip("目录", "无 book_url"));
339        checks.push(Check::skip("正文", "无 book_url"));
340        return;
341    };
342
343    let info = match engine.book_info(&book_url).await {
344        Ok(info) if !info.name.trim().is_empty() => {
345            checks.push(Check::pass("书详情", info.name.clone()));
346            info
347        }
348        Ok(_) => {
349            checks.push(Check::fail("书详情", "name 为空"));
350            checks.push(Check::skip("目录", "书详情失败"));
351            checks.push(Check::skip("正文", "书详情失败"));
352            return;
353        }
354        Err(e) => {
355            checks.push(Check::fail("书详情", err_detail(&e)));
356            checks.push(Check::skip("目录", "书详情失败"));
357            checks.push(Check::skip("正文", "书详情失败"));
358            return;
359        }
360    };
361
362    let toc_url = if info.toc_url.trim().is_empty() {
363        book_url
364    } else {
365        info.toc_url
366    };
367    let first_chapter_url = match engine.toc(&toc_url).await {
368        Ok(toc) if !toc.chapters.is_empty() => {
369            checks.push(Check::pass(
370                "目录",
371                format!("{} 卷 / {} 章", toc.volumes.len(), toc.chapters.len()),
372            ));
373            Some(toc.chapters[0].url.clone())
374        }
375        Ok(_) => {
376            checks.push(Check::fail("目录", "无章节"));
377            None
378        }
379        Err(e) => {
380            checks.push(Check::fail("目录", err_detail(&e)));
381            None
382        }
383    };
384
385    match first_chapter_url {
386        Some(url) => match engine.content(&url).await {
387            Ok(c) if c.trim().chars().count() >= 1 => {
388                checks.push(Check::pass("正文", format!("{} 字", c.chars().count())))
389            }
390            Ok(_) => checks.push(Check::fail("正文", "正文为空")),
391            Err(e) => checks.push(Check::fail("正文", err_detail(&e))),
392        },
393        None => checks.push(Check::skip("正文", "目录无可用章节")),
394    }
395}