1use std::{
2 fs,
3 ops::Range,
4 path::{Path, PathBuf},
5};
6
7use clap::Args;
8use color_eyre::eyre::{self, Result};
9use fluent_templates::Loader;
10use hashbrown::HashSet;
11use novel_api::Timing;
12use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd, TextMergeWithOffset};
13
14use crate::{
15 LANG_ID, LOCALES,
16 utils::{self, CurrentDir, Lang},
17};
18
19#[must_use]
20#[derive(Args)]
21#[command(arg_required_else_help = true,
22 about = LOCALES.lookup(&LANG_ID, "check_command"))]
23pub struct Check {
24 #[arg(help = LOCALES.lookup(&LANG_ID, "file_path"))]
25 pub file_path: PathBuf,
26
27 #[arg(long, default_value_t = false,
28 help = LOCALES.lookup(&LANG_ID, "basic_check"))]
29 pub basic_check: bool,
30
31 #[arg(long, default_value_t = false,
32 help = LOCALES.lookup(&LANG_ID, "word_count"))]
33 pub word_count: bool,
34}
35
36pub fn execute(config: Check) -> Result<()> {
37 let mut timing = Timing::new();
38
39 let input_file_path;
40 let input_file_parent_path;
41
42 if utils::is_markdown_or_txt_file(&config.file_path)? {
43 input_file_path = dunce::canonicalize(&config.file_path)?;
44 input_file_parent_path = input_file_path.parent().unwrap().to_path_buf();
45 } else if let Ok(Some(path)) =
46 utils::try_get_markdown_or_txt_file_name_in_dir(&config.file_path)
47 {
48 input_file_path = path;
49 input_file_parent_path = dunce::canonicalize(&config.file_path)?;
50 } else {
51 eyre::bail!("Invalid input path: `{}`", config.file_path.display());
52 }
53 tracing::info!("Input file path: `{}`", input_file_path.display());
54
55 let current_dir = CurrentDir::new(input_file_parent_path)?;
56
57 let bytes = fs::read(&input_file_path)?;
58 let markdown = simdutf8::basic::from_utf8(&bytes)?;
59 let mut parser = TextMergeWithOffset::new(
60 Parser::new_ext(markdown, Options::ENABLE_YAML_STYLE_METADATA_BLOCKS).into_offset_iter(),
61 );
62
63 let lang = check_metadata(&mut parser)?;
64
65 let max_width = (utils::terminal_size().0 / 2) as usize;
66 let mut char_set = HashSet::new();
67 let mut in_paragraph = false;
68 let mut word_count = 0;
69 parser.for_each(|(event, range)| match event {
70 Event::Start(tag) => match tag {
71 Tag::Heading { level, .. } => {
72 let title = markdown[range].trim_start_matches('#').trim();
73
74 if level == HeadingLevel::H1 {
75 if !check_volume_title(title, lang) {
76 println_msg(format!("Irregular volume title format: `{title}`"));
77 }
78 } else if level == HeadingLevel::H2 {
79 if !check_chapter_title(title, lang) {
80 println_msg(format!("Irregular chapter title format: `{title}`"));
81 }
82 } else {
83 println_msg(format!(
84 "Irregular heading level: `{level:?}`, content: `{title}`"
85 ));
86 }
87 }
88 Tag::Image { dest_url, .. } => {
89 let image_path = Path::new(dest_url.as_ref());
90
91 if !image_path.is_file() {
92 println_msg(format!("Image `{}` does not exist", image_path.display()));
93 }
94 }
95 Tag::Paragraph => {
96 in_paragraph = true;
97 }
98 Tag::BlockQuote(_)
99 | Tag::CodeBlock(_)
100 | Tag::List(_)
101 | Tag::Item
102 | Tag::FootnoteDefinition(_)
103 | Tag::Table(_)
104 | Tag::TableHead
105 | Tag::TableRow
106 | Tag::TableCell
107 | Tag::Emphasis
108 | Tag::Strong
109 | Tag::Strikethrough
110 | Tag::Link { .. }
111 | Tag::HtmlBlock
112 | Tag::MetadataBlock(_)
113 | Tag::DefinitionList
114 | Tag::DefinitionListTitle
115 | Tag::DefinitionListDefinition
116 | Tag::Superscript
117 | Tag::Subscript => {
118 if !config.basic_check {
119 let content = console::truncate_str(markdown[range].trim(), max_width, "...");
120
121 println_msg(format!(
122 "Markdown tag that should not appear: `{tag:?}`, content: `{content}`"
123 ));
124 }
125 }
126 },
127 Event::Text(text) => {
128 if !config.basic_check {
129 for c in text.chars() {
130 if !utils::is_cjk(c)
131 && !utils::is_punctuation(c)
132 && !c.is_ascii_alphanumeric()
133 && c != ' '
134 {
135 if char_set.contains(&c) {
136 continue;
137 } else {
138 char_set.insert(c);
139
140 println_msg(format!(
141 "Irregular char: `{}`, at `{}`",
142 c,
143 console::truncate_str(
144 markdown[range.clone()].trim(),
145 max_width,
146 "..."
147 )
148 ));
149 }
150 }
151 }
152 }
153
154 if config.word_count {
155 for c in text.chars() {
156 if utils::is_cjk(c) {
157 word_count += 1;
158 }
159 }
160 }
161 }
162 Event::End(tag) => {
163 if let TagEnd::Paragraph = tag {
164 in_paragraph = false;
165 }
166 }
167 Event::HardBreak
168 | Event::Code(_)
169 | Event::Html(_)
170 | Event::FootnoteReference(_)
171 | Event::SoftBreak
172 | Event::Rule
173 | Event::TaskListMarker(_)
174 | Event::InlineHtml(_)
175 | Event::InlineMath(_)
176 | Event::DisplayMath(_) => {
177 if !config.basic_check {
178 let content = console::truncate_str(markdown[range].trim(), max_width, "...");
179
180 println_msg(format!(
181 "Markdown event that should not appear: `{event:?}`, content: `{content}`"
182 ));
183 }
184 }
185 });
186
187 if config.word_count {
188 println!("Total number of words: {word_count}");
189 }
190
191 current_dir.restore()?;
192
193 tracing::debug!("Time spent on `check`: {}", timing.elapsed()?);
194
195 Ok(())
196}
197
198fn check_metadata<'a, T>(parser: &mut TextMergeWithOffset<'a, T>) -> Result<Lang>
199where
200 T: Iterator<Item = (Event<'a>, Range<usize>)>,
201{
202 let metadata = utils::get_metadata(parser)?;
203
204 eyre::ensure!(
205 metadata.cover_image_is_ok(),
206 "Cover image does not exist: `{}`",
207 metadata.cover_image.unwrap().display()
208 );
209
210 Ok(metadata.lang)
211}
212
213fn println_msg(msg: String) {
214 println!("{} {}", utils::emoji("⚠️"), msg);
215}
216
217macro_rules! regex {
218 ($re:literal $(,)?) => {{
219 static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
220 RE.get_or_init(|| regex::Regex::new($re).unwrap())
221 }};
222}
223
224#[must_use]
225fn check_chapter_title<T>(title: T, lang: Lang) -> bool
226where
227 T: AsRef<str>,
228{
229 let title = title.as_ref();
230
231 match lang {
232 Lang::ZhHant => {
233 let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章話] .+");
234 regex.is_match(title.as_ref())
235 }
236 Lang::ZhHans => {
237 let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章话] .+");
238 regex.is_match(title.as_ref())
239 }
240 }
241}
242
243#[must_use]
244fn check_volume_title<T>(title: T, lang: Lang) -> bool
245where
246 T: AsRef<str>,
247{
248 let title = title.as_ref();
249
250 match lang {
251 Lang::ZhHant => {
252 let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
253 regex.is_match(title) || title == "簡介"
254 }
255 Lang::ZhHans => {
256 let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
257 regex.is_match(title) || title == "简介"
258 }
259 }
260}
261
262#[cfg(test)]
263mod tests {
264 use super::*;
265
266 #[test]
267 fn check_chapter_title_test() {
268 assert!(check_chapter_title("第一章 被俘虏的开始", Lang::ZhHans));
269 assert!(check_chapter_title(
270 "第一百三十二章 标标标标标标标标标",
271 Lang::ZhHans
272 ));
273 assert!(check_chapter_title("第123章 标题标标标标", Lang::ZhHans));
274 assert!(!check_chapter_title("第一章 ", Lang::ZhHans));
275 assert!(!check_chapter_title("第1二3话", Lang::ZhHans));
276 assert!(!check_chapter_title("第123话标题", Lang::ZhHans));
277 assert!(!check_chapter_title("123话 标题", Lang::ZhHans));
278 }
279
280 #[test]
281 fn check_volume_title_test() {
282 assert!(check_volume_title(
283 "第三十二卷 标标标标标标标标标",
284 Lang::ZhHans
285 ));
286 assert!(!check_volume_title("第123话 标题标标标标", Lang::ZhHans));
287 assert!(!check_volume_title("第1卷 ", Lang::ZhHans));
288 }
289}