novel_cli/cmd/
check.rs

1use std::fs;
2use std::ops::Range;
3use std::path::{Path, PathBuf};
4
5use clap::Args;
6use color_eyre::eyre::{self, Result};
7use fluent_templates::Loader;
8use hashbrown::HashSet;
9use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd, TextMergeWithOffset};
10
11use crate::utils::{self, CurrentDir, Lang};
12use crate::{LANG_ID, LOCALES};
13
14#[must_use]
15#[derive(Args)]
16#[command(arg_required_else_help = true,
17    about = LOCALES.lookup(&LANG_ID, "check_command"))]
18pub struct Check {
19    #[arg(help = LOCALES.lookup(&LANG_ID, "file_path"))]
20    pub file_path: PathBuf,
21
22    #[arg(long, default_value_t = false,
23        help = LOCALES.lookup(&LANG_ID, "basic_check"))]
24    pub basic_check: bool,
25
26    #[arg(long, default_value_t = false,
27        help = LOCALES.lookup(&LANG_ID, "word_count"))]
28    pub word_count: bool,
29}
30
31pub fn execute(config: Check) -> Result<()> {
32    let input_file_path;
33    let input_file_parent_path;
34
35    if utils::is_markdown_or_txt_file(&config.file_path)? {
36        input_file_path = dunce::canonicalize(&config.file_path)?;
37        input_file_parent_path = input_file_path.parent().unwrap().to_path_buf();
38    } else if let Ok(Some(path)) =
39        utils::try_get_markdown_or_txt_file_name_in_dir(&config.file_path)
40    {
41        input_file_path = path;
42        input_file_parent_path = dunce::canonicalize(&config.file_path)?;
43    } else {
44        eyre::bail!("Invalid input path: `{}`", config.file_path.display());
45    }
46    tracing::info!("Input file path: `{}`", input_file_path.display());
47
48    let current_dir = CurrentDir::new(input_file_parent_path)?;
49
50    let bytes = fs::read(&input_file_path)?;
51    let markdown = simdutf8::basic::from_utf8(&bytes)?;
52    let mut parser = TextMergeWithOffset::new(
53        Parser::new_ext(markdown, Options::ENABLE_YAML_STYLE_METADATA_BLOCKS).into_offset_iter(),
54    );
55
56    let lang = check_metadata(&mut parser)?;
57
58    let max_width = (utils::terminal_size().0 / 2) as usize;
59    let mut char_set = HashSet::new();
60    let mut in_paragraph = false;
61    let mut word_count = 0;
62    parser.for_each(|(event, range)| match event {
63        Event::Start(tag) => match tag {
64            Tag::Heading { level, .. } => {
65                let title = markdown[range].trim_start_matches('#').trim();
66
67                if level == HeadingLevel::H1 {
68                    if !check_volume_title(title, lang) {
69                        println_msg(format!("Irregular volume title format: `{title}`"));
70                    }
71                } else if level == HeadingLevel::H2 {
72                    if !check_chapter_title(title, lang) {
73                        println_msg(format!("Irregular chapter title format: `{title}`"));
74                    }
75                } else {
76                    println_msg(format!(
77                        "Irregular heading level: `{level:?}`, content: `{title}`"
78                    ));
79                }
80            }
81            Tag::Image { dest_url, .. } => {
82                let image_path = Path::new(dest_url.as_ref());
83
84                if !image_path.is_file() {
85                    println_msg(format!("Image `{}` does not exist", image_path.display()));
86                }
87            }
88            Tag::Paragraph => {
89                in_paragraph = true;
90            }
91            Tag::BlockQuote(_)
92            | Tag::CodeBlock(_)
93            | Tag::List(_)
94            | Tag::Item
95            | Tag::FootnoteDefinition(_)
96            | Tag::Table(_)
97            | Tag::TableHead
98            | Tag::TableRow
99            | Tag::TableCell
100            | Tag::Emphasis
101            | Tag::Strong
102            | Tag::Strikethrough
103            | Tag::Link { .. }
104            | Tag::HtmlBlock
105            | Tag::MetadataBlock(_)
106            | Tag::DefinitionList
107            | Tag::DefinitionListTitle
108            | Tag::DefinitionListDefinition
109            | Tag::Superscript
110            | Tag::Subscript => {
111                if !config.basic_check {
112                    let content = console::truncate_str(markdown[range].trim(), max_width, "...");
113
114                    println_msg(format!(
115                        "Markdown tag that should not appear: `{tag:?}`, content: `{content}`"
116                    ));
117                }
118            }
119        },
120        Event::Text(text) => {
121            if !config.basic_check {
122                for c in text.chars() {
123                    if !unicode_blocks::is_cjk(c)
124                        && !utils::is_punctuation(c)
125                        && !c.is_ascii_alphanumeric()
126                        && c != ' '
127                    {
128                        if char_set.contains(&c) {
129                            continue;
130                        } else {
131                            char_set.insert(c);
132
133                            println_msg(format!(
134                                "Irregular char: `{}`, at `{}`",
135                                c,
136                                console::truncate_str(
137                                    markdown[range.clone()].trim(),
138                                    max_width,
139                                    "..."
140                                )
141                            ));
142                        }
143                    }
144                }
145            }
146
147            if config.word_count {
148                for c in text.chars() {
149                    if unicode_blocks::is_cjk(c) {
150                        word_count += 1;
151                    }
152                }
153            }
154        }
155        Event::End(tag) => {
156            if let TagEnd::Paragraph = tag {
157                in_paragraph = false;
158            }
159        }
160        Event::HardBreak
161        | Event::Code(_)
162        | Event::Html(_)
163        | Event::FootnoteReference(_)
164        | Event::SoftBreak
165        | Event::Rule
166        | Event::TaskListMarker(_)
167        | Event::InlineHtml(_)
168        | Event::InlineMath(_)
169        | Event::DisplayMath(_) => {
170            if !config.basic_check {
171                let content = console::truncate_str(markdown[range].trim(), max_width, "...");
172
173                println_msg(format!(
174                    "Markdown event that should not appear: `{event:?}`, content: `{content}`"
175                ));
176            }
177        }
178    });
179
180    if config.word_count {
181        println!("Total number of words: {word_count}");
182    }
183
184    current_dir.restore()?;
185
186    Ok(())
187}
188
189fn check_metadata<'a, T>(parser: &mut TextMergeWithOffset<'a, T>) -> Result<Lang>
190where
191    T: Iterator<Item = (Event<'a>, Range<usize>)>,
192{
193    let metadata = utils::get_metadata(parser)?;
194
195    eyre::ensure!(
196        metadata.cover_image_is_ok(),
197        "Cover image does not exist: `{}`",
198        metadata.cover_image.unwrap().display()
199    );
200
201    Ok(metadata.lang)
202}
203
204fn println_msg(msg: String) {
205    println!("{} {}", utils::emoji("⚠️"), msg);
206}
207
208macro_rules! regex {
209    ($re:literal $(,)?) => {{
210        static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
211        RE.get_or_init(|| regex::Regex::new($re).unwrap())
212    }};
213}
214
215#[must_use]
216fn check_chapter_title<T>(title: T, lang: Lang) -> bool
217where
218    T: AsRef<str>,
219{
220    let title = title.as_ref();
221
222    match lang {
223        Lang::ZhHant => {
224            let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章話] .+");
225            regex.is_match(title.as_ref())
226        }
227        Lang::ZhHans => {
228            let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章话] .+");
229            regex.is_match(title.as_ref())
230        }
231    }
232}
233
234#[must_use]
235fn check_volume_title<T>(title: T, lang: Lang) -> bool
236where
237    T: AsRef<str>,
238{
239    let title = title.as_ref();
240
241    match lang {
242        Lang::ZhHant => {
243            let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
244            regex.is_match(title) || title == "簡介"
245        }
246        Lang::ZhHans => {
247            let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
248            regex.is_match(title) || title == "简介"
249        }
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn check_chapter_title_test() {
259        assert!(check_chapter_title("第一章 被俘虏的开始", Lang::ZhHans));
260        assert!(check_chapter_title(
261            "第一百三十二章 标标标标标标标标标",
262            Lang::ZhHans
263        ));
264        assert!(check_chapter_title("第123章 标题标标标标", Lang::ZhHans));
265        assert!(!check_chapter_title("第一章 ", Lang::ZhHans));
266        assert!(!check_chapter_title("第1二3话", Lang::ZhHans));
267        assert!(!check_chapter_title("第123话标题", Lang::ZhHans));
268        assert!(!check_chapter_title("123话 标题", Lang::ZhHans));
269    }
270
271    #[test]
272    fn check_volume_title_test() {
273        assert!(check_volume_title(
274            "第三十二卷 标标标标标标标标标",
275            Lang::ZhHans
276        ));
277        assert!(!check_volume_title("第123话 标题标标标标", Lang::ZhHans));
278        assert!(!check_volume_title("第1卷 ", Lang::ZhHans));
279    }
280}