1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
use std::{fs, path::PathBuf};

use ahash::AHashSet;
use anyhow::{ensure, Result};
use clap::Args;
use console::{Alignment, Emoji};
use fluent_templates::Loader;
use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag};
use rayon::prelude::*;

use crate::{utils, LANG_ID, LOCALES};

#[derive(Debug, Args)]
#[command(arg_required_else_help = true,
    about = LOCALES.lookup(&LANG_ID, "check_command").expect("`check_command` does not exists"))]
pub struct Check {
    #[arg(help = LOCALES.lookup(&LANG_ID, "markdown_path").expect("`markdown_path` does not exists"))]
    pub markdown_path: PathBuf,
}

pub fn execute(config: Check) -> Result<()> {
    ensure!(
        utils::is_markdown(&config.markdown_path),
        "The input file is not in markdown format"
    );

    let bytes = fs::read(&config.markdown_path)?;
    let markdown = simdutf8::basic::from_utf8(&bytes)?;

    let mut options = Options::all();
    options.remove(Options::ENABLE_SMART_PUNCTUATION);
    let parser = Parser::new_ext(markdown, options);

    let events = parser.into_offset_iter().collect::<Vec<(_, _)>>();
    let set = parking_lot::RwLock::new(AHashSet::new());

    events.into_par_iter().for_each(|(event, range)| {
        if let Event::Start(Tag::Heading(heading_level, _, _)) = &event {
            let title = markdown[range].trim_start_matches('#').trim();

            if *heading_level == HeadingLevel::H1 && !check_volume_title(title) {
                println!("{} Irregular volume title format: {}", emoji(), title);
            } else if *heading_level == HeadingLevel::H2 && !check_chapter_title(title) {
                println!("{} Irregular chapter title format: {}", emoji(), title);
            }
        } else if let Event::Text(text) = &event {
            for c in text.chars() {
                if !utils::is_cjk(c)
                    && !utils::is_punctuation(c)
                    && !c.is_ascii_alphanumeric()
                    && c != ' '
                {
                    if set.read().contains(&c) {
                        continue;
                    } else {
                        set.write().insert(c);
                        println!(
                            "{} Irregular char: {}, at {}",
                            emoji(),
                            c,
                            markdown[range.clone()].trim()
                        );
                    }
                }
            }
        }
    });

    Ok(())
}

fn emoji() -> String {
    let emoji = Emoji("⚠️", ">").to_string();
    let emoji = console::pad_str(&emoji, 2, Alignment::Left, None);
    emoji.to_string()
}

macro_rules! regex {
    ($re:literal $(,)?) => {{
        static RE: once_cell::sync::OnceCell<regex::Regex> = once_cell::sync::OnceCell::new();
        RE.get_or_init(|| regex::Regex::new($re).unwrap())
    }};
}

fn check_chapter_title<T>(title: T) -> bool
where
    T: AsRef<str>,
{
    let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章话] .+");
    regex.is_match(title.as_ref())
}

fn check_volume_title<T>(title: T) -> bool
where
    T: AsRef<str>,
{
    let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
    regex.is_match(title.as_ref())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn check_chapter_title_test() {
        assert!(check_chapter_title("第一章 被俘虏的开始"));
        assert!(check_chapter_title("第一百三十二章 标标标标标标标标标"));
        assert!(check_chapter_title("第123章 标题标标标标"));
        assert!(!check_chapter_title("第一章 "));
        assert!(!check_chapter_title("第1二3话"));
        assert!(!check_chapter_title("第123话标题"));
        assert!(!check_chapter_title("123话 标题"));
    }

    #[test]
    fn check_volume_title_test() {
        assert!(check_volume_title("第三十二卷 标标标标标标标标标"));
        assert!(!check_volume_title("第123话 标题标标标标"));
        assert!(!check_volume_title("第1卷 "));
    }
}