novel_cli/cmd/
check.rs

1use std::fs;
2use std::ops::Range;
3use std::path::{Path, PathBuf};
4
5use clap::Args;
6use color_eyre::eyre::{self, Result};
7use fluent_templates::Loader;
8use hashbrown::HashSet;
9use novel_api::Timing;
10use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd, TextMergeWithOffset};
11
12use crate::utils::{self, CurrentDir, Lang};
13use crate::{LANG_ID, LOCALES};
14
15#[must_use]
16#[derive(Args)]
17#[command(arg_required_else_help = true,
18    about = LOCALES.lookup(&LANG_ID, "check_command"))]
19pub struct Check {
20    #[arg(help = LOCALES.lookup(&LANG_ID, "file_path"))]
21    pub file_path: PathBuf,
22
23    #[arg(long, default_value_t = false,
24        help = LOCALES.lookup(&LANG_ID, "basic_check"))]
25    pub basic_check: bool,
26
27    #[arg(long, default_value_t = false,
28        help = LOCALES.lookup(&LANG_ID, "word_count"))]
29    pub word_count: bool,
30}
31
32pub fn execute(config: Check) -> Result<()> {
33    let mut timing = Timing::new();
34
35    let input_file_path;
36    let input_file_parent_path;
37
38    if utils::is_markdown_or_txt_file(&config.file_path)? {
39        input_file_path = dunce::canonicalize(&config.file_path)?;
40        input_file_parent_path = input_file_path.parent().unwrap().to_path_buf();
41    } else if let Ok(Some(path)) =
42        utils::try_get_markdown_or_txt_file_name_in_dir(&config.file_path)
43    {
44        input_file_path = path;
45        input_file_parent_path = dunce::canonicalize(&config.file_path)?;
46    } else {
47        eyre::bail!("Invalid input path: `{}`", config.file_path.display());
48    }
49    tracing::info!("Input file path: `{}`", input_file_path.display());
50
51    let current_dir = CurrentDir::new(input_file_parent_path)?;
52
53    let bytes = fs::read(&input_file_path)?;
54    let markdown = simdutf8::basic::from_utf8(&bytes)?;
55    let mut parser = TextMergeWithOffset::new(
56        Parser::new_ext(markdown, Options::ENABLE_YAML_STYLE_METADATA_BLOCKS).into_offset_iter(),
57    );
58
59    let lang = check_metadata(&mut parser)?;
60
61    let max_width = (utils::terminal_size().0 / 2) as usize;
62    let mut char_set = HashSet::new();
63    let mut in_paragraph = false;
64    let mut word_count = 0;
65    parser.for_each(|(event, range)| match event {
66        Event::Start(tag) => match tag {
67            Tag::Heading { level, .. } => {
68                let title = markdown[range].trim_start_matches('#').trim();
69
70                if level == HeadingLevel::H1 {
71                    if !check_volume_title(title, lang) {
72                        println_msg(format!("Irregular volume title format: `{title}`"));
73                    }
74                } else if level == HeadingLevel::H2 {
75                    if !check_chapter_title(title, lang) {
76                        println_msg(format!("Irregular chapter title format: `{title}`"));
77                    }
78                } else {
79                    println_msg(format!(
80                        "Irregular heading level: `{level:?}`, content: `{title}`"
81                    ));
82                }
83            }
84            Tag::Image { dest_url, .. } => {
85                let image_path = Path::new(dest_url.as_ref());
86
87                if !image_path.is_file() {
88                    println_msg(format!("Image `{}` does not exist", image_path.display()));
89                }
90            }
91            Tag::Paragraph => {
92                in_paragraph = true;
93            }
94            Tag::BlockQuote(_)
95            | Tag::CodeBlock(_)
96            | Tag::List(_)
97            | Tag::Item
98            | Tag::FootnoteDefinition(_)
99            | Tag::Table(_)
100            | Tag::TableHead
101            | Tag::TableRow
102            | Tag::TableCell
103            | Tag::Emphasis
104            | Tag::Strong
105            | Tag::Strikethrough
106            | Tag::Link { .. }
107            | Tag::HtmlBlock
108            | Tag::MetadataBlock(_)
109            | Tag::DefinitionList
110            | Tag::DefinitionListTitle
111            | Tag::DefinitionListDefinition
112            | Tag::Superscript
113            | Tag::Subscript => {
114                if !config.basic_check {
115                    let content = console::truncate_str(markdown[range].trim(), max_width, "...");
116
117                    println_msg(format!(
118                        "Markdown tag that should not appear: `{tag:?}`, content: `{content}`"
119                    ));
120                }
121            }
122        },
123        Event::Text(text) => {
124            if !config.basic_check {
125                for c in text.chars() {
126                    if !utils::is_cjk(c)
127                        && !utils::is_punctuation(c)
128                        && !c.is_ascii_alphanumeric()
129                        && c != ' '
130                    {
131                        if char_set.contains(&c) {
132                            continue;
133                        } else {
134                            char_set.insert(c);
135
136                            println_msg(format!(
137                                "Irregular char: `{}`, at `{}`",
138                                c,
139                                console::truncate_str(
140                                    markdown[range.clone()].trim(),
141                                    max_width,
142                                    "..."
143                                )
144                            ));
145                        }
146                    }
147                }
148            }
149
150            if config.word_count {
151                for c in text.chars() {
152                    if utils::is_cjk(c) {
153                        word_count += 1;
154                    }
155                }
156            }
157        }
158        Event::End(tag) => {
159            if let TagEnd::Paragraph = tag {
160                in_paragraph = false;
161            }
162        }
163        Event::HardBreak
164        | Event::Code(_)
165        | Event::Html(_)
166        | Event::FootnoteReference(_)
167        | Event::SoftBreak
168        | Event::Rule
169        | Event::TaskListMarker(_)
170        | Event::InlineHtml(_)
171        | Event::InlineMath(_)
172        | Event::DisplayMath(_) => {
173            if !config.basic_check {
174                let content = console::truncate_str(markdown[range].trim(), max_width, "...");
175
176                println_msg(format!(
177                    "Markdown event that should not appear: `{event:?}`, content: `{content}`"
178                ));
179            }
180        }
181    });
182
183    if config.word_count {
184        println!("Total number of words: {word_count}");
185    }
186
187    current_dir.restore()?;
188
189    tracing::debug!("Time spent on `check`: {}", timing.elapsed()?);
190
191    Ok(())
192}
193
194fn check_metadata<'a, T>(parser: &mut TextMergeWithOffset<'a, T>) -> Result<Lang>
195where
196    T: Iterator<Item = (Event<'a>, Range<usize>)>,
197{
198    let metadata = utils::get_metadata(parser)?;
199
200    eyre::ensure!(
201        metadata.cover_image_is_ok(),
202        "Cover image does not exist: `{}`",
203        metadata.cover_image.unwrap().display()
204    );
205
206    Ok(metadata.lang)
207}
208
209fn println_msg(msg: String) {
210    println!("{} {}", utils::emoji("⚠️"), msg);
211}
212
213macro_rules! regex {
214    ($re:literal $(,)?) => {{
215        static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
216        RE.get_or_init(|| regex::Regex::new($re).unwrap())
217    }};
218}
219
220#[must_use]
221fn check_chapter_title<T>(title: T, lang: Lang) -> bool
222where
223    T: AsRef<str>,
224{
225    let title = title.as_ref();
226
227    match lang {
228        Lang::ZhHant => {
229            let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章話] .+");
230            regex.is_match(title.as_ref())
231        }
232        Lang::ZhHans => {
233            let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章话] .+");
234            regex.is_match(title.as_ref())
235        }
236    }
237}
238
239#[must_use]
240fn check_volume_title<T>(title: T, lang: Lang) -> bool
241where
242    T: AsRef<str>,
243{
244    let title = title.as_ref();
245
246    match lang {
247        Lang::ZhHant => {
248            let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
249            regex.is_match(title) || title == "簡介"
250        }
251        Lang::ZhHans => {
252            let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
253            regex.is_match(title) || title == "简介"
254        }
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    #[test]
263    fn check_chapter_title_test() {
264        assert!(check_chapter_title("第一章 被俘虏的开始", Lang::ZhHans));
265        assert!(check_chapter_title(
266            "第一百三十二章 标标标标标标标标标",
267            Lang::ZhHans
268        ));
269        assert!(check_chapter_title("第123章 标题标标标标", Lang::ZhHans));
270        assert!(!check_chapter_title("第一章 ", Lang::ZhHans));
271        assert!(!check_chapter_title("第1二3话", Lang::ZhHans));
272        assert!(!check_chapter_title("第123话标题", Lang::ZhHans));
273        assert!(!check_chapter_title("123话 标题", Lang::ZhHans));
274    }
275
276    #[test]
277    fn check_volume_title_test() {
278        assert!(check_volume_title(
279            "第三十二卷 标标标标标标标标标",
280            Lang::ZhHans
281        ));
282        assert!(!check_volume_title("第123话 标题标标标标", Lang::ZhHans));
283        assert!(!check_volume_title("第1卷 ", Lang::ZhHans));
284    }
285}