novel_cli/cmd/
check.rs

1use std::{
2    fs,
3    ops::Range,
4    path::{Path, PathBuf},
5};
6
7use clap::Args;
8use color_eyre::eyre::{self, Result};
9use fluent_templates::Loader;
10use hashbrown::HashSet;
11use novel_api::Timing;
12use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd, TextMergeWithOffset};
13
14use crate::{
15    LANG_ID, LOCALES,
16    utils::{self, CurrentDir, Lang},
17};
18
19#[must_use]
20#[derive(Args)]
21#[command(arg_required_else_help = true,
22    about = LOCALES.lookup(&LANG_ID, "check_command"))]
23pub struct Check {
24    #[arg(help = LOCALES.lookup(&LANG_ID, "file_path"))]
25    pub file_path: PathBuf,
26
27    #[arg(long, default_value_t = false,
28        help = LOCALES.lookup(&LANG_ID, "basic_check"))]
29    pub basic_check: bool,
30
31    #[arg(long, default_value_t = false,
32        help = LOCALES.lookup(&LANG_ID, "word_count"))]
33    pub word_count: bool,
34}
35
36pub fn execute(config: Check) -> Result<()> {
37    let mut timing = Timing::new();
38
39    let input_file_path;
40    let input_file_parent_path;
41
42    if utils::is_markdown_or_txt_file(&config.file_path)? {
43        input_file_path = dunce::canonicalize(&config.file_path)?;
44        input_file_parent_path = input_file_path.parent().unwrap().to_path_buf();
45    } else if let Ok(Some(path)) =
46        utils::try_get_markdown_or_txt_file_name_in_dir(&config.file_path)
47    {
48        input_file_path = path;
49        input_file_parent_path = dunce::canonicalize(&config.file_path)?;
50    } else {
51        eyre::bail!("Invalid input path: `{}`", config.file_path.display());
52    }
53    tracing::info!("Input file path: `{}`", input_file_path.display());
54
55    let current_dir = CurrentDir::new(input_file_parent_path)?;
56
57    let bytes = fs::read(&input_file_path)?;
58    let markdown = simdutf8::basic::from_utf8(&bytes)?;
59    let mut parser = TextMergeWithOffset::new(
60        Parser::new_ext(markdown, Options::ENABLE_YAML_STYLE_METADATA_BLOCKS).into_offset_iter(),
61    );
62
63    let lang = check_metadata(&mut parser)?;
64
65    let max_width = (utils::terminal_size().0 / 2) as usize;
66    let mut char_set = HashSet::new();
67    let mut in_paragraph = false;
68    let mut word_count = 0;
69    parser.for_each(|(event, range)| match event {
70        Event::Start(tag) => match tag {
71            Tag::Heading { level, .. } => {
72                let title = markdown[range].trim_start_matches('#').trim();
73
74                if level == HeadingLevel::H1 {
75                    if !check_volume_title(title, lang) {
76                        println_msg(format!("Irregular volume title format: `{title}`"));
77                    }
78                } else if level == HeadingLevel::H2 {
79                    if !check_chapter_title(title, lang) {
80                        println_msg(format!("Irregular chapter title format: `{title}`"));
81                    }
82                } else {
83                    println_msg(format!(
84                        "Irregular heading level: `{level:?}`, content: `{title}`"
85                    ));
86                }
87            }
88            Tag::Image { dest_url, .. } => {
89                let image_path = Path::new(dest_url.as_ref());
90
91                if !image_path.is_file() {
92                    println_msg(format!("Image `{}` does not exist", image_path.display()));
93                }
94            }
95            Tag::Paragraph => {
96                in_paragraph = true;
97            }
98            Tag::BlockQuote(_)
99            | Tag::CodeBlock(_)
100            | Tag::List(_)
101            | Tag::Item
102            | Tag::FootnoteDefinition(_)
103            | Tag::Table(_)
104            | Tag::TableHead
105            | Tag::TableRow
106            | Tag::TableCell
107            | Tag::Emphasis
108            | Tag::Strong
109            | Tag::Strikethrough
110            | Tag::Link { .. }
111            | Tag::HtmlBlock
112            | Tag::MetadataBlock(_)
113            | Tag::DefinitionList
114            | Tag::DefinitionListTitle
115            | Tag::DefinitionListDefinition
116            | Tag::Superscript
117            | Tag::Subscript => {
118                if !config.basic_check {
119                    let content = console::truncate_str(markdown[range].trim(), max_width, "...");
120
121                    println_msg(format!(
122                        "Markdown tag that should not appear: `{tag:?}`, content: `{content}`"
123                    ));
124                }
125            }
126        },
127        Event::Text(text) => {
128            if !config.basic_check {
129                for c in text.chars() {
130                    if !utils::is_cjk(c)
131                        && !utils::is_punctuation(c)
132                        && !c.is_ascii_alphanumeric()
133                        && c != ' '
134                    {
135                        if char_set.contains(&c) {
136                            continue;
137                        } else {
138                            char_set.insert(c);
139
140                            println_msg(format!(
141                                "Irregular char: `{}`, at `{}`",
142                                c,
143                                console::truncate_str(
144                                    markdown[range.clone()].trim(),
145                                    max_width,
146                                    "..."
147                                )
148                            ));
149                        }
150                    }
151                }
152            }
153
154            if config.word_count {
155                for c in text.chars() {
156                    if utils::is_cjk(c) {
157                        word_count += 1;
158                    }
159                }
160            }
161        }
162        Event::End(tag) => {
163            if let TagEnd::Paragraph = tag {
164                in_paragraph = false;
165            }
166        }
167        Event::HardBreak
168        | Event::Code(_)
169        | Event::Html(_)
170        | Event::FootnoteReference(_)
171        | Event::SoftBreak
172        | Event::Rule
173        | Event::TaskListMarker(_)
174        | Event::InlineHtml(_)
175        | Event::InlineMath(_)
176        | Event::DisplayMath(_) => {
177            if !config.basic_check {
178                let content = console::truncate_str(markdown[range].trim(), max_width, "...");
179
180                println_msg(format!(
181                    "Markdown event that should not appear: `{event:?}`, content: `{content}`"
182                ));
183            }
184        }
185    });
186
187    if config.word_count {
188        println!("Total number of words: {word_count}");
189    }
190
191    current_dir.restore()?;
192
193    tracing::debug!("Time spent on `check`: {}", timing.elapsed()?);
194
195    Ok(())
196}
197
198fn check_metadata<'a, T>(parser: &mut TextMergeWithOffset<'a, T>) -> Result<Lang>
199where
200    T: Iterator<Item = (Event<'a>, Range<usize>)>,
201{
202    let metadata = utils::get_metadata(parser)?;
203
204    eyre::ensure!(
205        metadata.cover_image_is_ok(),
206        "Cover image does not exist: `{}`",
207        metadata.cover_image.unwrap().display()
208    );
209
210    Ok(metadata.lang)
211}
212
213fn println_msg(msg: String) {
214    println!("{} {}", utils::emoji("⚠️"), msg);
215}
216
217macro_rules! regex {
218    ($re:literal $(,)?) => {{
219        static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
220        RE.get_or_init(|| regex::Regex::new($re).unwrap())
221    }};
222}
223
224#[must_use]
225fn check_chapter_title<T>(title: T, lang: Lang) -> bool
226where
227    T: AsRef<str>,
228{
229    let title = title.as_ref();
230
231    match lang {
232        Lang::ZhHant => {
233            let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章話] .+");
234            regex.is_match(title.as_ref())
235        }
236        Lang::ZhHans => {
237            let regex = regex!(r"第([零一二三四五六七八九十百千]|[0-9]){1,7}[章话] .+");
238            regex.is_match(title.as_ref())
239        }
240    }
241}
242
243#[must_use]
244fn check_volume_title<T>(title: T, lang: Lang) -> bool
245where
246    T: AsRef<str>,
247{
248    let title = title.as_ref();
249
250    match lang {
251        Lang::ZhHant => {
252            let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
253            regex.is_match(title) || title == "簡介"
254        }
255        Lang::ZhHans => {
256            let regex = regex!(r"第([一二三四五六七八九十]|[0-9]){1,3}卷 .+");
257            regex.is_match(title) || title == "简介"
258        }
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    #[test]
267    fn check_chapter_title_test() {
268        assert!(check_chapter_title("第一章 被俘虏的开始", Lang::ZhHans));
269        assert!(check_chapter_title(
270            "第一百三十二章 标标标标标标标标标",
271            Lang::ZhHans
272        ));
273        assert!(check_chapter_title("第123章 标题标标标标", Lang::ZhHans));
274        assert!(!check_chapter_title("第一章 ", Lang::ZhHans));
275        assert!(!check_chapter_title("第1二3话", Lang::ZhHans));
276        assert!(!check_chapter_title("第123话标题", Lang::ZhHans));
277        assert!(!check_chapter_title("123话 标题", Lang::ZhHans));
278    }
279
280    #[test]
281    fn check_volume_title_test() {
282        assert!(check_volume_title(
283            "第三十二卷 标标标标标标标标标",
284            Lang::ZhHans
285        ));
286        assert!(!check_volume_title("第123话 标题标标标标", Lang::ZhHans));
287        assert!(!check_volume_title("第1卷 ", Lang::ZhHans));
288    }
289}
novel_cli/cmd/check.rs

novel_cli/cmd/
check.rs