use crate::novel::VolumeMarker;
use crate::utils::novel_catch_dir;
use serde::{Deserialize, Serialize};
const DEFAULT_MAX_TITLE_LEN: usize = 35;
const SCORE_MIN_GAP: usize = 64;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TocRule {
pub name: String,
pub rule: String,
#[serde(default)]
pub is_volume: bool,
#[serde(default)]
pub is_exclude: bool,
#[serde(default = "default_true")]
pub enable: bool,
#[serde(default)]
pub example: Option<String>,
#[serde(default)]
pub serial_number: i32,
}
fn default_true() -> bool {
true
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TocRuleSet {
#[serde(default = "default_max_title_len")]
pub max_title_len: usize,
pub rules: Vec<TocRule>,
}
fn default_max_title_len() -> usize {
DEFAULT_MAX_TITLE_LEN
}
impl Default for TocRuleSet {
fn default() -> Self {
Self::builtin()
}
}
impl TocRuleSet {
pub fn builtin() -> Self {
let rule = |name: &str, pat: &str, is_volume: bool, is_exclude: bool, sn: i32| TocRule {
name: name.to_string(),
rule: pat.to_string(),
is_volume,
is_exclude,
enable: true,
example: None,
serial_number: sn,
};
const NUM: &str = r"[0-9〇零一二两三四五六七八九十百千万壹贰叁肆伍陆柒捌玖拾佰仟]";
Self {
max_title_len: DEFAULT_MAX_TITLE_LEN,
rules: vec![
rule(
"排除:卷计数词歧义",
&format!(r"^第?{NUM}{{1,8}}(?:部门|部队|部属|部分|部件|部落)"),
false,
true,
-2,
),
rule(
"排除:节课",
&format!(r"^第{NUM}{{1,8}}节课"),
false,
true,
-1,
),
rule(
"卷/部/篇",
&format!(r"^第{NUM}{{1,8}}[卷部篇]"),
true,
false,
0,
),
rule("上中下卷", r"^[上中下][卷部篇]", true, false, 1),
rule(
"数字章节(章/节/回/话)",
&format!(r"^第{NUM}{{1,12}}[章节回话](?:[ \t、,,::..\-—_~·].*)?$"),
false,
false,
2,
),
rule(
"英文章节",
r"^(?:[Cc]hapter|[Ss]ection|[Pp]art|[Ee]pisode)\s*\d{1,4}",
false,
false,
3,
),
rule(
"特殊章节",
r"^(?:楔子|引子|序章|序言|前言|后记|尾声|终章|完本感言|番外|外传|附录|内容简介|作品相关)",
false,
false,
4,
),
],
}
}
pub fn load() -> Self {
let mut set = Self::builtin();
let Ok(dir) = novel_catch_dir() else {
return set;
};
let path = dir.join("toc_rules.json");
if let Ok(content) = std::fs::read_to_string(&path) {
set.merge_user_json(&content);
}
set
}
pub fn merge_user_json(&mut self, content: &str) -> bool {
match serde_json::from_str::<TocRuleSet>(content) {
Ok(user) => {
self.max_title_len = user.max_title_len;
self.rules.extend(user.rules);
true
}
Err(_) => false,
}
}
}
struct CompiledRule {
regex: fancy_regex::Regex,
is_volume: bool,
is_exclude: bool,
}
fn compile(set: &TocRuleSet) -> Vec<CompiledRule> {
set.rules
.iter()
.filter(|r| r.enable)
.filter_map(|r| {
fancy_regex::Regex::new(&r.rule)
.ok()
.map(|regex| CompiledRule {
regex,
is_volume: r.is_volume,
is_exclude: r.is_exclude,
})
})
.collect()
}
fn clean_line(line: &str) -> &str {
line.trim_end().trim_start_matches([' ', '\t'])
}
fn is_title_candidate(cleaned: &str, max_title_len: usize) -> bool {
if cleaned.is_empty() {
return false;
}
if cleaned.chars().count() > max_title_len {
return false;
}
!matches!(cleaned.chars().last(), Some('。' | '.' | '.'))
}
pub fn detect<I>(lines: I, set: &TocRuleSet) -> (Vec<(String, usize)>, Vec<VolumeMarker>)
where
I: IntoIterator<Item = (String, usize)>,
{
let compiled = compile(set);
let chapter_rules: Vec<&CompiledRule> = compiled
.iter()
.filter(|r| !r.is_volume && !r.is_exclude)
.collect();
let mut per_rule: Vec<Vec<(String, usize)>> = vec![Vec::new(); chapter_rules.len()];
let mut volume_hits: Vec<(String, usize)> = Vec::new();
for (line, offset) in lines {
let cleaned = clean_line(&line);
if !is_title_candidate(cleaned, set.max_title_len) {
continue;
}
if compiled
.iter()
.any(|r| r.is_exclude && r.regex.is_match(cleaned).unwrap_or(false))
{
continue;
}
if compiled
.iter()
.any(|r| r.is_volume && r.regex.is_match(cleaned).unwrap_or(false))
{
volume_hits.push((cleaned.to_string(), offset));
continue;
}
for (i, cr) in chapter_rules.iter().enumerate() {
if cr.regex.is_match(cleaned).unwrap_or(false) {
per_rule[i].push((cleaned.to_string(), offset));
}
}
}
let best = per_rule
.into_iter()
.max_by_key(|hits| effective_count(hits))
.unwrap_or_default();
let chapters = best;
let volumes = volume_hits
.into_iter()
.map(|(title, offset)| VolumeMarker {
title,
first_chapter_index: chapters.partition_point(|(_, o)| *o < offset),
})
.collect();
(chapters, volumes)
}
fn effective_count(hits: &[(String, usize)]) -> usize {
let mut count = 0usize;
let mut last: Option<usize> = None;
for (_, offset) in hits {
match last {
Some(prev) if offset.saturating_sub(prev) < SCORE_MIN_GAP => {}
_ => {
count += 1;
last = Some(*offset);
}
}
}
count
}
#[cfg(test)]
mod tests {
use super::*;
fn lines(input: &str) -> Vec<(String, usize)> {
let mut out = Vec::new();
let mut offset = 0usize;
for line in input.split_inclusive('\n') {
out.push((line.to_string(), offset));
offset += line.len();
}
out
}
#[test]
fn detects_chapter_with_zhang() {
let text = "第1章 罗峰\n正文内容。\n第2章 RR\n更多正文。\n";
let (chapters, volumes) = detect(lines(text), &TocRuleSet::builtin());
assert_eq!(chapters.len(), 2);
assert_eq!(chapters[0].0, "第1章 罗峰");
assert!(volumes.is_empty());
}
#[test]
fn detects_chapter_with_jie() {
let text = "第一节 纵身亡魔心仍不悔\n正文。\n第二节 逆光阴五百年觉悟\n正文。\n";
let (chapters, _) = detect(lines(text), &TocRuleSet::builtin());
assert_eq!(chapters.len(), 2);
assert_eq!(chapters[1].0, "第二节 逆光阴五百年觉悟");
}
#[test]
fn volume_takes_priority_and_indexes_first_chapter() {
let text = "第一卷 魔性不改\n第一节 甲\n正文。\n第二节 乙\n第二卷 魔子出山\n第三节 丙\n";
let (chapters, volumes) = detect(lines(text), &TocRuleSet::builtin());
assert_eq!(chapters.len(), 3);
assert_eq!(volumes.len(), 2);
assert_eq!(volumes[0].first_chapter_index, 0); assert_eq!(volumes[1].first_chapter_index, 2); }
#[test]
fn ignores_inline_chapter_references() {
let text = " 违反了我们商家城的城规第三章第二十五条,必须严惩。\n (详情见本卷第672章。)\n第一章 真标题\n";
let (chapters, _) = detect(lines(text), &TocRuleSet::builtin());
assert_eq!(chapters.len(), 1);
assert_eq!(chapters[0].0, "第一章 真标题");
}
#[test]
fn excludes_counter_word_ambiguity() {
let text = "第三部分 概述\n第三节课的内容。\n第一章 正章\n";
let (chapters, volumes) = detect(lines(text), &TocRuleSet::builtin());
assert!(volumes.is_empty());
assert_eq!(chapters.len(), 1);
assert_eq!(chapters[0].0, "第一章 正章");
}
#[test]
fn long_line_is_not_title() {
let long = "第一章".to_string() + &"超长内容".repeat(20) + "\n";
let (chapters, _) = detect(lines(&long), &TocRuleSet::builtin());
assert!(chapters.is_empty());
}
#[test]
fn merge_valid_user_json_adds_rule() {
let mut set = TocRuleSet::builtin();
let builtin_len = set.rules.len();
let json = r#"{ "maxTitleLen": 50, "rules": [
{ "name": "自定义", "rule": "^卷[一二三]", "isVolume": true }
] }"#;
assert!(set.merge_user_json(json));
assert_eq!(set.max_title_len, 50);
assert_eq!(set.rules.len(), builtin_len + 1);
}
#[test]
fn merge_corrupt_user_json_falls_back() {
let mut set = TocRuleSet::builtin();
let builtin_len = set.rules.len();
assert!(!set.merge_user_json("{ this is not valid json "));
assert_eq!(set.rules.len(), builtin_len);
let (chapters, _) = detect(lines("第1章 甲\n正文。\n"), &set);
assert_eq!(chapters.len(), 1);
}
#[test]
fn corrupt_user_rule_does_not_crash() {
let mut set = TocRuleSet::builtin();
set.rules.push(TocRule {
name: "bad".into(),
rule: "(unclosed".into(),
is_volume: false,
is_exclude: false,
enable: true,
example: None,
serial_number: 99,
});
let (chapters, _) = detect(lines("第1章 甲\n正文。\n"), &set);
assert_eq!(chapters.len(), 1);
}
}