1use std::fs;
2use std::ops::Range;
3use std::path::{Path, PathBuf};
4
5use clap::Args;
6use color_eyre::eyre::{self, Result};
7use fluent_templates::Loader;
8use hashbrown::HashSet;
9use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd, TextMergeWithOffset};
10
11use crate::utils::{self, CurrentDir, Lang};
12use crate::{LANG_ID, LOCALES};
13
14#[must_use]
15#[derive(Args)]
16#[command(arg_required_else_help = true,
17 about = LOCALES.lookup(&LANG_ID, "check_command"))]
18pub struct Check {
19 #[arg(help = LOCALES.lookup(&LANG_ID, "file_path"))]
20 pub file_path: PathBuf,
21
22 #[arg(long, default_value_t = false,
23 help = LOCALES.lookup(&LANG_ID, "basic_check"))]
24 pub basic_check: bool,
25
26 #[arg(long, default_value_t = false,
27 help = LOCALES.lookup(&LANG_ID, "word_count"))]
28 pub word_count: bool,
29}
30
31pub fn execute(config: Check) -> Result<()> {
32 let input_file_path;
33 let input_file_parent_path;
34
35 if utils::is_markdown_or_txt_file(&config.file_path)? {
36 input_file_path = dunce::canonicalize(&config.file_path)?;
37 input_file_parent_path = input_file_path.parent().unwrap().to_path_buf();
38 } else if let Ok(Some(path)) =
39 utils::try_get_markdown_or_txt_file_name_in_dir(&config.file_path)
40 {
41 input_file_path = path;
42 input_file_parent_path = dunce::canonicalize(&config.file_path)?;
43 } else {
44 eyre::bail!("Invalid input path: `{}`", config.file_path.display());
45 }
46 tracing::info!("Input file path: `{}`", input_file_path.display());
47
48 let current_dir = CurrentDir::new(input_file_parent_path)?;
49
50 let bytes = fs::read(&input_file_path)?;
51 let markdown = simdutf8::basic::from_utf8(&bytes)?;
52 let mut parser = TextMergeWithOffset::new(
53 Parser::new_ext(markdown, Options::ENABLE_YAML_STYLE_METADATA_BLOCKS).into_offset_iter(),
54 );
55
56 let lang = check_metadata(&mut parser)?;
57
58 let max_width = (utils::terminal_size().0 / 2) as usize;
59 let mut char_set = HashSet::new();
60 let mut in_paragraph = false;
61 let mut word_count = 0;
62 parser.for_each(|(event, range)| match event {
63 Event::Start(tag) => match tag {
64 Tag::Heading { level, .. } => {
65 let title = markdown[range].trim_start_matches('#').trim();
66
67 if level == HeadingLevel::H1 {
68 if !check_volume_title(title, lang) {
69 println_msg(format!("Irregular volume title format: `{title}`"));
70 }
71 } else if level == HeadingLevel::H2 {
72 if !check_chapter_title(title, lang) {
73 println_msg(format!("Irregular chapter title format: `{title}`"));
74 }
75 } else {
76 println_msg(format!(
77 "Irregular heading level: `{level:?}`, content: `{title}`"
78 ));
79 }
80 }
81 Tag::Image { dest_url, .. } => {
82 let image_path = Path::new(dest_url.as_ref());
83
84 if !image_path.is_file() {
85 println_msg(format!("Image `{}` does not exist", image_path.display()));
86 }
87 }
88 Tag::Paragraph => {
89 in_paragraph = true;
90 }
91 Tag::BlockQuote(_)
92 | Tag::CodeBlock(_)
93 | Tag::List(_)
94 | Tag::Item
95 | Tag::FootnoteDefinition(_)
96 | Tag::Table(_)
97 | Tag::TableHead
98 | Tag::TableRow
99 | Tag::TableCell
100 | Tag::Emphasis
101 | Tag::Strong
102 | Tag::Strikethrough
103 | Tag::Link { .. }
104 | Tag::HtmlBlock
105 | Tag::MetadataBlock(_)
106 | Tag::DefinitionList
107 | Tag::DefinitionListTitle
108 | Tag::DefinitionListDefinition
109 | Tag::Superscript
110 | Tag::Subscript => {
111 if !config.basic_check {
112 let content = console::truncate_str(markdown[range].trim(), max_width, "...");
113
114 println_msg(format!(
115 "Markdown tag that should not appear: `{tag:?}`, content: `{content}`"
116 ));
117 }
118 }
119 },
120 Event::Text(text) => {
121 if !config.basic_check {
122 for c in text.chars() {
123 if !unicode_blocks::is_cjk(c)
124 && !utils::is_punctuation(c)
125 && !c.is_ascii_alphanumeric()
126 && c != ' '
127 {
128 if char_set.contains(&c) {
129 continue;
130 } else {
131 char_set.insert(c);
132
133 println_msg(format!(
134 "Irregular char: `{}`, at `{}`",
135 c,
136 console::truncate_str(
137 markdown[range.clone()].trim(),
138 max_width,
139 "..."
140 )
141 ));
142 }
143 }
144 }
145 }
146
147 if config.word_count {
148 for c in text.chars() {
149 if unicode_blocks::is_cjk(c) {
150 word_count += 1;
151 }
152 }
153 }
154 }
155 Event::End(tag) => {
156 if let TagEnd::Paragraph = tag {
157 in_paragraph = false;
158 }
159 }
160 Event::HardBreak
161 | Event::Code(_)
162 | Event::Html(_)
163 | Event::FootnoteReference(_)
164 | Event::SoftBreak
165 | Event::Rule
166 | Event::TaskListMarker(_)
167 | Event::InlineHtml(_)
168 | Event::InlineMath(_)
169 | Event::DisplayMath(_) => {
170 if !config.basic_check {
171 let content = console::truncate_str(markdown[range].trim(), max_width, "...");
172
173 println_msg(format!(
174 "Markdown event that should not appear: `{event:?}`, content: `{content}`"
175 ));
176 }
177 }
178 });
179
180 if config.word_count {
181 println!("Total number of words: {word_count}");
182 }
183
184 current_dir.restore()?;
185
186 Ok(())
187}
188
189fn check_metadata<'a, T>(parser: &mut TextMergeWithOffset<'a, T>) -> Result<Lang>
190where
191 T: Iterator<Item = (Event<'a>, Range<usize>)>,
192{
193 let metadata = utils::get_metadata(parser)?;
194
195 eyre::ensure!(
196 metadata.cover_image_is_ok(),
197 "Cover image does not exist: `{}`",
198 metadata.cover_image.unwrap().display()
199 );
200
201 Ok(metadata.lang)
202}
203
204fn println_msg(msg: String) {
205 println!("{} {}", utils::emoji("⚠️"), msg);
206}
207
208macro_rules! regex {
209 ($re:literal $(,)?) => {{
210 static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
211 RE.get_or_init(|| regex::Regex::new($re).unwrap())
212 }};
213}
214
215#[must_use]
216fn check_chapter_title<T>(title: T, lang: Lang) -> bool
217where
218 T: AsRef<str>,
219{
220 let title = title.as_ref();
221
222 match lang {
223 Lang::ZhHant => {
224 let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章話] .+");
225 regex.is_match(title.as_ref())
226 }
227 Lang::ZhHans => {
228 let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章话] .+");
229 regex.is_match(title.as_ref())
230 }
231 }
232}
233
234#[must_use]
235fn check_volume_title<T>(title: T, lang: Lang) -> bool
236where
237 T: AsRef<str>,
238{
239 let title = title.as_ref();
240
241 match lang {
242 Lang::ZhHant => {
243 let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
244 regex.is_match(title) || title == "簡介"
245 }
246 Lang::ZhHans => {
247 let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
248 regex.is_match(title) || title == "简介"
249 }
250 }
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256
257 #[test]
258 fn check_chapter_title_test() {
259 assert!(check_chapter_title("第一章 被俘虏的开始", Lang::ZhHans));
260 assert!(check_chapter_title(
261 "第一百三十二章 标标标标标标标标标",
262 Lang::ZhHans
263 ));
264 assert!(check_chapter_title("第123章 标题标标标标", Lang::ZhHans));
265 assert!(!check_chapter_title("第一章 ", Lang::ZhHans));
266 assert!(!check_chapter_title("第1二3话", Lang::ZhHans));
267 assert!(!check_chapter_title("第123话标题", Lang::ZhHans));
268 assert!(!check_chapter_title("123话 标题", Lang::ZhHans));
269 }
270
271 #[test]
272 fn check_volume_title_test() {
273 assert!(check_volume_title(
274 "第三十二卷 标标标标标标标标标",
275 Lang::ZhHans
276 ));
277 assert!(!check_volume_title("第123话 标题标标标标", Lang::ZhHans));
278 assert!(!check_volume_title("第1卷 ", Lang::ZhHans));
279 }
280}