novel_cli/utils/
convert.rs

1use std::sync::OnceLock;
2
3use color_eyre::eyre::Result;
4use novel_api::Timing;
5use opencc_rs::{Config, OpenCC};
6
7use super::{Content, Novel};
8use crate::cmd::Convert;
9
10pub fn convert<T>(novel: &mut Novel, converts: T) -> Result<()>
11where
12    T: AsRef<[Convert]>,
13{
14    if converts.as_ref().is_empty() {
15        return Ok(());
16    }
17
18    let mut timing = Timing::new();
19
20    novel.name = convert_str(&novel.name, &converts, false)?;
21
22    novel.author_name = convert_str(&novel.author_name, &converts, false)?;
23
24    if novel.introduction.is_some() {
25        #[allow(clippy::needless_borrows_for_generic_args)]
26        for line in novel.introduction.as_mut().unwrap() {
27            *line = convert_str(&line, &converts, false)?;
28        }
29    }
30
31    for volume in &mut novel.volumes {
32        volume.title = convert_str(&volume.title, &converts, true)?;
33
34        for chapter in &mut volume.chapters {
35            chapter.title = convert_str(&chapter.title, &converts, true)?;
36
37            if chapter.contents.is_some() {
38                let mut contents = chapter.contents.take().unwrap();
39
40                for content in &mut contents {
41                    #[allow(clippy::needless_borrows_for_generic_args)]
42                    if let Content::Text(line) = content {
43                        *line = convert_str(&line, &converts, false)?;
44                    }
45                }
46
47                chapter.contents = Some(contents);
48            }
49        }
50    }
51
52    tracing::debug!("Time spent on `convert`: {}", timing.elapsed()?);
53
54    Ok(())
55}
56
57pub fn convert_str<T, E>(str: T, converts: E, in_heading: bool) -> Result<String>
58where
59    T: AsRef<str>,
60    E: AsRef<[Convert]>,
61{
62    let converts = converts.as_ref();
63
64    if converts.is_empty() {
65        Ok(str.as_ref().to_string())
66    } else {
67        let mut result = String::new();
68
69        static OPENCC_S2T: OnceLock<OpenCC> = OnceLock::new();
70        static OPENCC_T2S: OnceLock<OpenCC> = OnceLock::new();
71        static OPENCC_JP2T2S: OnceLock<OpenCC> = OnceLock::new();
72
73        if converts.contains(&Convert::JP2T2S) {
74            result = OPENCC_JP2T2S
75                .get_or_init(|| {
76                    OpenCC::new(vec![Config::JP2T, Config::TW2S])
77                        .expect("OpenCC initialization failure")
78                })
79                .convert(&str)?;
80        } else if converts.contains(&Convert::T2S) {
81            result = OPENCC_T2S
82                .get_or_init(|| {
83                    OpenCC::new(vec![Config::TW2S]).expect("OpenCC initialization failure")
84                })
85                .convert(&str)?;
86        } else if converts.contains(&Convert::S2T) {
87            result = OPENCC_S2T
88                .get_or_init(|| {
89                    OpenCC::new(vec![Config::S2T]).expect("OpenCC initialization failure")
90                })
91                .convert(&str)?;
92        }
93
94        if converts.contains(&Convert::CUSTOM) {
95            if result.is_empty() {
96                result = custom_convert(str, in_heading);
97            } else {
98                result = custom_convert(result, in_heading);
99            }
100
101            if converts.contains(&Convert::JP2T2S) || converts.contains(&Convert::T2S) {
102                let mut new_result = String::with_capacity(result.len());
103                for c in result.chars() {
104                    match super::CONVERT_T2S_MAP.get(&c) {
105                        Some(new) => {
106                            new_result.push(*new);
107                        }
108                        None => new_result.push(c),
109                    }
110                }
111
112                result = new_result;
113            }
114        }
115
116        Ok(result.trim().to_string())
117    }
118}
119
120#[must_use]
121fn custom_convert<T>(str: T, in_heading: bool) -> String
122where
123    T: AsRef<str>,
124{
125    if str.as_ref().is_empty() {
126        return String::default();
127    }
128
129    let mut s = String::new();
130    for c in html_escape::decode_html_entities(str.as_ref())
131        .to_string()
132        .chars()
133    {
134        match super::CONVERT_MAP.get(&c) {
135            Some(new) => {
136                s.push(*new);
137            }
138            None => s.push(c),
139        }
140    }
141
142    let mut result = String::new();
143    for (c, next_c) in s.chars().zip(s.chars().skip(1)) {
144        do_custom_convert(c, Some(next_c), &mut result, in_heading);
145    }
146    do_custom_convert(s.chars().last().unwrap(), None, &mut result, in_heading);
147
148    result
149}
150
151fn do_custom_convert(c: char, next_c: Option<char>, result: &mut String, in_heading: bool) {
152    let space = ' ';
153    let last = result.chars().last();
154
155    if
156    // https://en.wikipedia.org/wiki/Zero-width_space
157    c == '\u{200B}'
158        // https://en.wikipedia.org/wiki/Zero-width_non-joiner
159        || c == '\u{200C}'
160        // https://en.wikipedia.org/wiki/Zero-width_joiner
161        || c == '\u{200D}'
162        // https://en.wikipedia.org/wiki/Word_joiner
163        || c == '\u{2060}'
164        // https://en.wikipedia.org/wiki/Byte_order_mark
165        || c == '\u{FEFF}'
166        // https://en.wikipedia.org/wiki/Unicode_control_characters
167        || c.is_control()
168    {
169        // do nothing
170    } else if c.is_whitespace() {
171        if last.is_some_and(|c| !super::is_punctuation(c)) {
172            result.push(space)
173        }
174    } else if super::is_punctuation(c) {
175        if !in_heading && last.is_some_and(|c| c.is_whitespace()) {
176            result.pop();
177        }
178
179        if c == ':' {
180            // e.g. 08:00
181            if last.is_some_and(|c| c.is_ascii_digit())
182                && next_c.is_some_and(|c| c.is_ascii_digit())
183            {
184                result.push(':');
185            } else {
186                result.push(':');
187            }
188        } else {
189            result.push(c);
190        }
191    } else {
192        result.push(c);
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use pretty_assertions::assert_eq;
199    use testresult::TestResult;
200
201    use super::*;
202
203    #[test]
204    fn convert() -> TestResult {
205        let config = vec![Convert::JP2T2S, Convert::CUSTOM];
206
207        assert_eq!(convert_str("幺", &config, false)?, "幺");
208        assert_eq!(convert_str("妳", &config, false)?, "你");
209        assert_eq!(convert_str("Q0", &config, false)?, "Q0");
210        assert_eq!(convert_str("“安装后”", &config, false)?, "“安装后”");
211        assert_eq!(convert_str("&amp;", &config, false)?, "&");
212        assert_eq!(convert_str("安裝後?", &config, false)?, "安装后?");
213        assert_eq!(convert_str("安 装", &config, false)?, "安 装");
214        assert_eq!(convert_str("你\n好", &config, false)?, "你好");
215        assert_eq!(convert_str("08:00", &config, false)?, "08:00");
216        assert_eq!(convert_str("接著", &config, false)?, "接着");
217        assert_eq!(
218            convert_str("第一章 “你好”", &config, false)?,
219            "第一章“你好”"
220        );
221        assert_eq!(
222            convert_str("第一章 “你好”", &config, true)?,
223            "第一章 “你好”"
224        );
225
226        Ok(())
227    }
228}